On Mon, Sep 15, 2025 at 09:36:38AM -0700, Kalesh Singh wrote:
Needed observability on in field devices can be collected with minimal overhead and can be toggled on and off. Event driven telemetry can be done with tracepoint BPF programs.
The process comm is provided for aggregation across devices and tgid is to enable per-process aggregation per device.
This allows for observing the distribution of such problems in the field, to deduce if there are legitimate bugs or if a bump to the limit is warranted.
It's not really a bug though is it? It's somebody running out of resources.
I'm not sure how useful this is really. But I'm open to being convinced!
I also wonder if this is better as a statistic? You'd figure out it was a problem that way too right?
Cc: Andrew Morton akpm@linux-foundation.org Cc: David Hildenbrand david@redhat.com Cc: "Liam R. Howlett" Liam.Howlett@oracle.com Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Mike Rapoport rppt@kernel.org Cc: Minchan Kim minchan@kernel.org Cc: Pedro Falcato pfalcato@suse.de Signed-off-by: Kalesh Singh kaleshsingh@google.com
This breaks the VMA tests, please make sure to always check them:
cc -I../shared -I. -I../../include -I../../arch/x86/include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined -c -o vma.o vma.c In file included from vma.c:33: ../../../mm/vma.c:10:10: fatal error: trace/events/vma.h: No such file or directory 10 | #include <trace/events/vma.h> | ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make: *** [<builtin>: vma.o] Error 1
See below though, you've included this in the wrong place (I don't blame you, perhaps we've not made this _very_ clear :)
Chnages in v2:
- Add needed observability for operations failing due to the vma count limit, per Minchan (Since there isn't a common point for debug logging due checks being external to the capacity based vma_count_remaining() helper. I used a trace event for low overhead and to facilitate event driven telemetry for in field devices)
include/trace/events/vma.h | 32 ++++++++++++++++++++++++++++++++
Do we really need a new file? We already have VMA-related tracepoints no?
Also if you add a new file you _have_ to update MAINTAINERS.
mm/mmap.c | 5 ++++- mm/mremap.c | 10 ++++++++-- mm/vma.c | 11 +++++++++-- 4 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 include/trace/events/vma.h
diff --git a/include/trace/events/vma.h b/include/trace/events/vma.h new file mode 100644 index 000000000000..2fed63b0d0a6 --- /dev/null +++ b/include/trace/events/vma.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vma
+#if !defined(_TRACE_VMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VMA_H
+#include <linux/tracepoint.h>
+TRACE_EVENT(max_vma_count_exceeded,
- TP_PROTO(struct task_struct *task),
- TP_ARGS(task),
- TP_STRUCT__entry(
__string(comm, task->comm)
__field(pid_t, tgid)
- ),
- TP_fast_assign(
__assign_str(comm);
__entry->tgid = task->tgid;
- ),
- TP_printk("comm=%s tgid=%d", __get_str(comm), __entry->tgid)
+);
+#endif /* _TRACE_VMA_H */
+/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/mm/mmap.c b/mm/mmap.c index 30ddd550197e..0bb311bf48f3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -56,6 +56,7 @@
#define CREATE_TRACE_POINTS #include <trace/events/mmap.h> +#include <trace/events/vma.h>
#include "internal.h"
@@ -374,8 +375,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EOVERFLOW;
/* Too many mappings? */
- if (!vma_count_remaining(mm))
if (!vma_count_remaining(mm)) {
trace_max_vma_count_exceeded(current);
return -ENOMEM;
}
/*
- addr is returned from get_unmapped_area,
diff --git a/mm/mremap.c b/mm/mremap.c index 14d35d87e89b..f42ac05f0069 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -30,6 +30,8 @@ #include <asm/tlb.h> #include <asm/pgalloc.h>
+#include <trace/events/vma.h>
#include "internal.h"
/* Classify the kind of remap operation being performed. */ @@ -1040,8 +1042,10 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm) * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */
- if (vma_count_remaining(current->mm) < 4)
- if (vma_count_remaining(current->mm) < 4) {
trace_max_vma_count_exceeded(current);
But we didn't exceed it, we're guessing we might?
return -ENOMEM;
}
if (vma->vm_ops && vma->vm_ops->may_split) { if (vma->vm_start != old_addr)
@@ -1817,8 +1821,10 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) * the threshold. In other words, is the current map count + 6 at or * below the threshold? Otherwise return -ENOMEM here to be more safe. */
- if (vma_count_remaining(current->mm) < 6)
- if (vma_count_remaining(current->mm) < 6) {
trace_max_vma_count_exceeded(current);
Similar point here.
return -ENOMEM;
}
return 0;
} diff --git a/mm/vma.c b/mm/vma.c index 0e4fcaebe209..692c33c3e84d 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -7,6 +7,8 @@ #include "vma_internal.h" #include "vma.h"
+#include <trace/events/vma.h>
Nope you don't do this :)
vma.c is designed to be userland testable and _intentionally_ puts all its includes in vma_internal.h.
So you'd need to add this include over there, and then update the vma tests vma_internal.h to stub out the trace function.
struct mmap_state { struct mm_struct *mm; struct vma_iterator *vmi; @@ -621,8 +623,10 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long addr, int new_below) {
- if (!vma_count_remaining(vma->vm_mm))
if (!vma_count_remaining(vma->vm_mm)) {
trace_max_vma_count_exceeded(current);
return -ENOMEM;
}
return __split_vma(vmi, vma, addr, new_below);
} @@ -1375,6 +1379,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, */ if (vms->end < vms->vma->vm_end && !vma_count_remaining(vms->vma->vm_mm)) {
}trace_max_vma_count_exceeded(current); error = -ENOMEM; goto vma_count_exceeded;
@@ -2801,8 +2806,10 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) return -ENOMEM;
- if (!vma_count_remaining(mm))
if (!vma_count_remaining(mm)) {
trace_max_vma_count_exceeded(current);
return -ENOMEM;
}
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM;
-- 2.51.0.384.g4c02a37b29-goog
Cheers, Lorenzo