From: Pu Lehui pulehui@huawei.com
patch 1: the mainly fix for uprobe pte be overwritten issue. patch 2: WARN_ON_ONCE for new_pte not NULL during move_ptes. patch 3: extract some utils function for upcomming selftest. patch 4: selftest related to this series.
v1: - limit skip uprobe_mmap to copy_vma flow. - add related selftest. - correct Fixes tag.
RFC v2: https://lore.kernel.org/all/20250527132351.2050820-1-pulehui@huaweicloud.com... - skip uprobe_mmap on expanded vma. - add skip_vma_uprobe field to struct vma_prepare and vma_merge_struct. (Lorenzo) - add WARN_ON_ONCE when new_pte is not NULL. (Oleg) - Corrected some of the comments.
RFC v1: https://lore.kernel.org/all/20250521092503.3116340-1-pulehui@huaweicloud.com...
Pu Lehui (4): mm: Fix uprobe pte be overwritten when expanding vma mm: Expose abnormal new_pte during move_ptes selftests/mm: Extract read_sysfs and write_sysfs into vm_util selftests/mm: Add test about uprobe pte be orphan during vma merge
mm/mremap.c | 2 ++ mm/vma.c | 20 ++++++++++-- mm/vma.h | 7 +++++ tools/testing/selftests/mm/ksm_tests.c | 32 ++------------------ tools/testing/selftests/mm/merge.c | 42 ++++++++++++++++++++++++++ tools/testing/selftests/mm/thuge-gen.c | 6 ++-- tools/testing/selftests/mm/vm_util.c | 38 +++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 2 ++ 8 files changed, 113 insertions(+), 36 deletions(-)
From: Pu Lehui pulehui@huawei.com
We encountered a BUG alert triggered by Syzkaller as follows: BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1
And we can reproduce it with the following steps: 1. register uprobe on file at zero offset 2. mmap the file at zero offset: addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0); 3. mremap part of vma1 to new vma2: addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE); 4. mremap back to orig addr1: mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1);
In the step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2 with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1 to vma2, then unmap the vma1 range [addr1, addr1 + 4096]. In tht step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096, addr1 + 8192] still maps the file, it will take vma_merge_new_range to expand the range, and then do uprobe_mmap in vma_complete. Since the merged vma pgoff is also zero offset, it will install uprobe anon page to the merged vma. However, the upcomming move_page_tables step, which use set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite the newly uprobe pte in the merged vma, and lead that pte to be orphan.
Since the uprobe pte will be remapped to the merged vma, we can remove the unnecessary uprobe_mmap upon merged vma.
This problem was first find in linux-6.6.y and also exists in the community syzkaller: https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/
CC: stable@vger.kernel.org Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Suggested-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com Signed-off-by: Pu Lehui pulehui@huawei.com --- mm/vma.c | 20 +++++++++++++++++--- mm/vma.h | 7 +++++++ 2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/mm/vma.c b/mm/vma.c index 1c6595f282e5..b2d7c03d8aa4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -169,6 +169,9 @@ static void init_multi_vma_prep(struct vma_prepare *vp, vp->file = vma->vm_file; if (vp->file) vp->mapping = vma->vm_file->f_mapping; + + if (vmg && vmg->skip_vma_uprobe) + vp->skip_vma_uprobe = true; }
/* @@ -358,10 +361,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
if (vp->file) { i_mmap_unlock_write(vp->mapping); - uprobe_mmap(vp->vma);
- if (vp->adj_next) - uprobe_mmap(vp->adj_next); + if (!vp->skip_vma_uprobe) { + uprobe_mmap(vp->vma); + + if (vp->adj_next) + uprobe_mmap(vp->adj_next); + } }
if (vp->remove) { @@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
+ /* + * If the VMA we are copying might contain a uprobe PTE, ensure + * that we do not establish one upon merge. Otherwise, when mremap() + * moves page tables, it will orphan the newly created PTE. + */ + if (vma->vm_file) + vmg.skip_vma_uprobe = true; + new_vma = find_vma_prev(mm, addr, &vmg.prev); if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */ diff --git a/mm/vma.h b/mm/vma.h index 9a8af9be29a8..0db066e7a45d 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -19,6 +19,8 @@ struct vma_prepare { struct vm_area_struct *insert; struct vm_area_struct *remove; struct vm_area_struct *remove2; + + bool skip_vma_uprobe :1; };
struct unlink_vma_file_batch { @@ -120,6 +122,11 @@ struct vma_merge_struct { */ bool give_up_on_oom :1;
+ /* + * If set, skip uprobe_mmap upon merged vma. + */ + bool skip_vma_uprobe :1; + /* Internal flags set during merge process: */
/*
On Thu, May 29, 2025 at 03:56:47PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
We encountered a BUG alert triggered by Syzkaller as follows: BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1
And we can reproduce it with the following steps:
- register uprobe on file at zero offset
- mmap the file at zero offset: addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0);
- mremap part of vma1 to new vma2: addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE);
- mremap back to orig addr1: mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1);
In the step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2 with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1 to vma2, then unmap the vma1 range [addr1, addr1 + 4096]. In tht step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096, addr1 + 8192] still maps the file, it will take vma_merge_new_range to expand the range, and then do uprobe_mmap in vma_complete. Since the merged vma pgoff is also zero offset, it will install uprobe anon page to the merged vma. However, the upcomming move_page_tables step, which use set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite the newly uprobe pte in the merged vma, and lead that pte to be orphan.
Since the uprobe pte will be remapped to the merged vma, we can remove the unnecessary uprobe_mmap upon merged vma.
This problem was first find in linux-6.6.y and also exists in the community syzkaller: https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/
CC: stable@vger.kernel.org Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Suggested-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com Signed-off-by: Pu Lehui pulehui@huawei.com
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
mm/vma.c | 20 +++++++++++++++++--- mm/vma.h | 7 +++++++ 2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/mm/vma.c b/mm/vma.c index 1c6595f282e5..b2d7c03d8aa4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -169,6 +169,9 @@ static void init_multi_vma_prep(struct vma_prepare *vp, vp->file = vma->vm_file; if (vp->file) vp->mapping = vma->vm_file->f_mapping;
- if (vmg && vmg->skip_vma_uprobe)
vp->skip_vma_uprobe = true;
}
/* @@ -358,10 +361,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
if (vp->file) { i_mmap_unlock_write(vp->mapping);
uprobe_mmap(vp->vma);
if (vp->adj_next)
uprobe_mmap(vp->adj_next);
if (!vp->skip_vma_uprobe) {
uprobe_mmap(vp->vma);
if (vp->adj_next)
uprobe_mmap(vp->adj_next);
}
}
if (vp->remove) {
@@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
- new_vma = find_vma_prev(mm, addr, &vmg.prev); if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */
diff --git a/mm/vma.h b/mm/vma.h index 9a8af9be29a8..0db066e7a45d 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -19,6 +19,8 @@ struct vma_prepare { struct vm_area_struct *insert; struct vm_area_struct *remove; struct vm_area_struct *remove2;
- bool skip_vma_uprobe :1;
};
struct unlink_vma_file_batch { @@ -120,6 +122,11 @@ struct vma_merge_struct { */ bool give_up_on_oom :1;
/*
* If set, skip uprobe_mmap upon merged vma.
*/
bool skip_vma_uprobe :1;
/* Internal flags set during merge process: */
/*
-- 2.34.1
if (vp->remove) { @@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
if (vp->remove) { @@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
vma_merge_new_range() -> vma_expand() -> commit_merge() -> vma_complete() will ensure expected behaviour.
-- Cheers,
David / dhildenb
On 02.06.25 13:55, Lorenzo Stoakes wrote:
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
if (vp->remove) {
@@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
Essentially, an mremap() that grows an existing mapping while moving it.
Assume we have
[ VMA 0 ] [ VMA X]
And want to grow VMA 0 by 1 page.
We cannot grow in-place, so we'll have to copy VMA 0 to another VMA, and while at it, expand it by 1 page.
expand_vma()->move_vma()->copy_vma_and_data()->copy_vma()
But maybe I'm getting lost in the code. (e.g., expand_vma() vs. vma_expand() ... confusing :) )
On Mon, Jun 02, 2025 at 02:26:21PM +0200, David Hildenbrand wrote:
On 02.06.25 13:55, Lorenzo Stoakes wrote:
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
if (vp->remove) {
@@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
Essentially, an mremap() that grows an existing mapping while moving it.
Assume we have
[ VMA 0 ] [ VMA X]
And want to grow VMA 0 by 1 page.
We cannot grow in-place, so we'll have to copy VMA 0 to another VMA, and while at it, expand it by 1 page.
expand_vma()->move_vma()->copy_vma_and_data()->copy_vma()
OK so in that case you'd not have a merge at all, you'd have a new VMA and all would be well and beautiful :) or I mean hopefully. Maybe?
But maybe I'm getting lost in the code. (e.g., expand_vma() vs. vma_expand() ... confusing :) )
Yeah I think Liam or somebody else called me out for this :P I mean it's accurate naming in mremap.c but that's kinda in the context of the mremap.
For VMA merging vma_expand() is used generally for a new VMA, since you're always expanding into the gap, but because we all did terrible things in past lives also called by relocate_vma_down() which is a kinda-hack for initial stack relocation on initial process setup.
It maybe needs renaming... But expand kinda accurately describes what's going on just semi-overloaded vs. mremap() now :>)
VMA merge code now at least readable enough that you can pick up on the various oddnesses clearly :P
-- Cheers,
David / dhildenb
On 02.06.25 15:26, Lorenzo Stoakes wrote:
On Mon, Jun 02, 2025 at 02:26:21PM +0200, David Hildenbrand wrote:
On 02.06.25 13:55, Lorenzo Stoakes wrote:
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
if (vp->remove) {
@@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
Essentially, an mremap() that grows an existing mapping while moving it.
Assume we have
[ VMA 0 ] [ VMA X]
And want to grow VMA 0 by 1 page.
We cannot grow in-place, so we'll have to copy VMA 0 to another VMA, and while at it, expand it by 1 page.
expand_vma()->move_vma()->copy_vma_and_data()->copy_vma()
OK so in that case you'd not have a merge at all, you'd have a new VMA and all would be well and beautiful :) or I mean hopefully. Maybe?
I'm really not sure. :)
Could there be some very odd cases like
[VMA 0 ][ VMA 1 ][ VMA X]
and when we mremap() [ VMA 1 ] to grow, we would place it before [VMA 0 ], and just by pure lick end up merging with that if the ranges match?
We're in the corner cases now, ... so this might not be relevant. But I hope we can clean up that uprobe mmap call later ...
But maybe I'm getting lost in the code. (e.g., expand_vma() vs. vma_expand() ... confusing :) )
Yeah I think Liam or somebody else called me out for this :P I mean it's accurate naming in mremap.c but that's kinda in the context of the mremap.
For VMA merging vma_expand() is used generally for a new VMA, since you're always expanding into the gap, but because we all did terrible things in past lives also called by relocate_vma_down() which is a kinda-hack for initial stack relocation on initial process setup.
It maybe needs renaming... But expand kinda accurately describes what's going on just semi-overloaded vs. mremap() now :>)
VMA merge code now at least readable enough that you can pick up on the various oddnesses clearly :P
:)
On Mon, Jun 02, 2025 at 06:28:58PM +0200, David Hildenbrand wrote:
On 02.06.25 15:26, Lorenzo Stoakes wrote:
On Mon, Jun 02, 2025 at 02:26:21PM +0200, David Hildenbrand wrote:
On 02.06.25 13:55, Lorenzo Stoakes wrote:
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
if (vp->remove) {
@@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; }
- /*
* If the VMA we are copying might contain a uprobe PTE, ensure
* that we do not establish one upon merge. Otherwise, when mremap()
* moves page tables, it will orphan the newly created PTE.
*/
- if (vma->vm_file)
vmg.skip_vma_uprobe = true;
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
Essentially, an mremap() that grows an existing mapping while moving it.
Assume we have
[ VMA 0 ] [ VMA X]
And want to grow VMA 0 by 1 page.
We cannot grow in-place, so we'll have to copy VMA 0 to another VMA, and while at it, expand it by 1 page.
expand_vma()->move_vma()->copy_vma_and_data()->copy_vma()
OK so in that case you'd not have a merge at all, you'd have a new VMA and all would be well and beautiful :) or I mean hopefully. Maybe?
I'm really not sure. :)
Could there be some very odd cases like
[VMA 0 ][ VMA 1 ][ VMA X]
and when we mremap() [ VMA 1 ] to grow, we would place it before [VMA 0 ], and just by pure lick end up merging with that if the ranges match?
When we invoke copy_vma() we pass vrm->new_addr and vrm->new_len so this would trigger a merge and the correct uprobe handling.
Since we just don't trigger the breakpoint install in this situation, we'd correctly move over the breakpoint to the right position, and overwrite anything we expanded into.
I do want to do a mremap doc actually to cover all the weird cases, because there's some weird stuff in there and it's worth covering off stuff for users and stuff for kernel people :)
We're in the corner cases now, ... so this might not be relevant. But I hope we can clean up that uprobe mmap call later ...
Yeah with this initial fix in we can obviously revisit as needed!
But maybe I'm getting lost in the code. (e.g., expand_vma() vs. vma_expand() ... confusing :) )
Yeah I think Liam or somebody else called me out for this :P I mean it's accurate naming in mremap.c but that's kinda in the context of the mremap.
For VMA merging vma_expand() is used generally for a new VMA, since you're always expanding into the gap, but because we all did terrible things in past lives also called by relocate_vma_down() which is a kinda-hack for initial stack relocation on initial process setup.
It maybe needs renaming... But expand kinda accurately describes what's going on just semi-overloaded vs. mremap() now :>)
VMA merge code now at least readable enough that you can pick up on the various oddnesses clearly :P
:)
-- Cheers,
David / dhildenb
Cheers, Lorenzo
On 02.06.25 19:01, Lorenzo Stoakes wrote:
On Mon, Jun 02, 2025 at 06:28:58PM +0200, David Hildenbrand wrote:
On 02.06.25 15:26, Lorenzo Stoakes wrote:
On Mon, Jun 02, 2025 at 02:26:21PM +0200, David Hildenbrand wrote:
On 02.06.25 13:55, Lorenzo Stoakes wrote:
On Fri, May 30, 2025 at 08:51:14PM +0200, David Hildenbrand wrote:
> if (vp->remove) { > @@ -1823,6 +1829,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, > faulted_in_anon_vma = false; > } > + /* > + * If the VMA we are copying might contain a uprobe PTE, ensure > + * that we do not establish one upon merge. Otherwise, when mremap() > + * moves page tables, it will orphan the newly created PTE. > + */ > + if (vma->vm_file) > + vmg.skip_vma_uprobe = true; > +
Assuming we extend the VMA on the way (not merge), would we handle that properly?
Or is that not possible on this code path or already broken either way?
I'm not sure in what context you mean expand, vma_merge_new_range() calls vma_expand() so we call an expand a merge here, and this flag will be obeyed.
Essentially, an mremap() that grows an existing mapping while moving it.
Assume we have
[ VMA 0 ] [ VMA X]
And want to grow VMA 0 by 1 page.
We cannot grow in-place, so we'll have to copy VMA 0 to another VMA, and while at it, expand it by 1 page.
expand_vma()->move_vma()->copy_vma_and_data()->copy_vma()
OK so in that case you'd not have a merge at all, you'd have a new VMA and all would be well and beautiful :) or I mean hopefully. Maybe?
I'm really not sure. :)
Could there be some very odd cases like
[VMA 0 ][ VMA 1 ][ VMA X]
and when we mremap() [ VMA 1 ] to grow, we would place it before [VMA 0 ], and just by pure lick end up merging with that if the ranges match?
When we invoke copy_vma() we pass vrm->new_addr and vrm->new_len so this would trigger a merge and the correct uprobe handling.
Since we just don't trigger the breakpoint install in this situation, we'd correctly move over the breakpoint to the right position, and overwrite anything we expanded into.
I do want to do a mremap doc actually to cover all the weird cases, because there's some weird stuff in there and it's worth covering off stuff for users and stuff for kernel people :)
We're in the corner cases now, ... so this might not be relevant. But I hope we can clean up that uprobe mmap call later ...
Yeah with this initial fix in we can obviously revisit as needed!
As Andrew was asking off-list:
Acked-by: David Hildenbrand david@redhat.com
:)
On Tue, 3 Jun 2025 14:16:37 +0200 David Hildenbrand david@redhat.com wrote:
We're in the corner cases now, ... so this might not be relevant. But I hope we can clean up that uprobe mmap call later ...
Yeah with this initial fix in we can obviously revisit as needed!
As Andrew was asking off-list:
Acked-by: David Hildenbrand david@redhat.com
OK, thanks. I'll aim to send this series in to Linus during this merge window.
From: Pu Lehui pulehui@huawei.com
When executing move_ptes, the new_pte must be NULL, otherwise it will be overwritten by the old_pte, and cause the abnormal new_pte to be leaked. In order to make this problem to be more explicit, let's add WARN_ON_ONCE when new_pte is not NULL.
Suggested-by: Oleg Nesterov oleg@redhat.com Signed-off-by: Pu Lehui pulehui@huawei.com --- mm/mremap.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/mm/mremap.c b/mm/mremap.c index 83e359754961..4e2491f8c2ce 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { + WARN_ON_ONCE(!pte_none(*new_pte)); + if (pte_none(ptep_get(old_pte))) continue;
On Thu, 29 May 2025 15:56:48 +0000 Pu Lehui pulehui@huaweicloud.com wrote:
From: Pu Lehui pulehui@huawei.com
When executing move_ptes, the new_pte must be NULL, otherwise it will be overwritten by the old_pte, and cause the abnormal new_pte to be leaked. In order to make this problem to be more explicit, let's add WARN_ON_ONCE when new_pte is not NULL.
...
--- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc, for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) {
WARN_ON_ONCE(!pte_none(*new_pte));
- if (pte_none(ptep_get(old_pte))) continue;
We now have no expectation that this will trigger, yes? It's a sanity check that patch [1/4] is working? Perhaps VM_WARN_ON_ONCE() would be more appropriate. And maybe even a comment:
/* temporary, remove this one day */
On 2025/5/30 3:19, Andrew Morton wrote:
On Thu, 29 May 2025 15:56:48 +0000 Pu Lehui pulehui@huaweicloud.com wrote:
From: Pu Lehui pulehui@huawei.com
When executing move_ptes, the new_pte must be NULL, otherwise it will be overwritten by the old_pte, and cause the abnormal new_pte to be leaked. In order to make this problem to be more explicit, let's add WARN_ON_ONCE when new_pte is not NULL.
...
--- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc, for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) {
WARN_ON_ONCE(!pte_none(*new_pte));
- if (pte_none(ptep_get(old_pte))) continue;
We now have no expectation that this will trigger, yes? It's a sanity
Hi Andrew,
This can sanitize abnormal new_pte. It is expected that uprobe would not come in later, but others, uncertain🤔? So it will be a good alert. And after patch 1 it will not trigger WARNING.
check that patch [1/4] is working? Perhaps VM_WARN_ON_ONCE() would be
Agree, should I respin one more?
more appropriate. And maybe even a comment:
/* temporary, remove this one day */
On Fri, 30 May 2025 09:24:54 +0800 Pu Lehui pulehui@huaweicloud.com wrote:
check that patch [1/4] is working? Perhaps VM_WARN_ON_ONCE() would be
Agree, should I respin one more?
That's OK, I added this:
--- a/mm/mremap.c~mm-expose-abnormal-new_pte-during-move_ptes-fix +++ a/mm/mremap.c @@ -237,7 +237,7 @@ static int move_ptes(struct pagetable_mo
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { - WARN_ON_ONCE(!pte_none(*new_pte)); + VM_WARN_ON_ONCE(!pte_none(*new_pte));
if (pte_none(ptep_get(old_pte))) continue; _
On Thu, May 29, 2025 at 03:56:48PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
When executing move_ptes, the new_pte must be NULL, otherwise it will be overwritten by the old_pte, and cause the abnormal new_pte to be leaked. In order to make this problem to be more explicit, let's add WARN_ON_ONCE when new_pte is not NULL.
Suggested-by: Oleg Nesterov oleg@redhat.com Signed-off-by: Pu Lehui pulehui@huawei.com
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
(both this and the amended version :)
mm/mremap.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/mm/mremap.c b/mm/mremap.c index 83e359754961..4e2491f8c2ce 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) {
WARN_ON_ONCE(!pte_none(*new_pte));
I mean, we really really should not ever be seeing a mapped PTE here, so I think a WARN_ON_ONCE() is fine.
We unmap anything ahead of time, and only I think this uprobe breakpoint installation would ever cause this to be the case.
We can make this a VM_WARN_ON_ONCE() too I suppose, just in case there's something we're not thinking of, but I'd say at some point we'd want to change it to a WARN_ON_ONCE().
if (pte_none(ptep_get(old_pte))) continue;
-- 2.34.1
On 05/30, Lorenzo Stoakes wrote:
--- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) {
WARN_ON_ONCE(!pte_none(*new_pte));
I mean, we really really should not ever be seeing a mapped PTE here, so I think a WARN_ON_ONCE() is fine.
We unmap anything ahead of time, and only I think this uprobe breakpoint installation would ever cause this to be the case.
We can make this a VM_WARN_ON_ONCE() too I suppose, just in case there's something we're not thinking of, but I'd say at some point we'd want to change it to a WARN_ON_ONCE().
Note also that move_normal_pmd/move_normal_pud use WARN_ON_ONCE(!xxx_none(...)), not VM_WARN_ON_ONCE().
Oleg.
From: Pu Lehui pulehui@huawei.com
Extract read_sysfs and write_sysfs into vm_util. Meanwhile, rename the function in thuge-gen that has the same name as read_sysfs.
Signed-off-by: Pu Lehui pulehui@huawei.com --- tools/testing/selftests/mm/ksm_tests.c | 32 ++-------------------- tools/testing/selftests/mm/thuge-gen.c | 6 ++-- tools/testing/selftests/mm/vm_util.c | 38 ++++++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 2 ++ 4 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index dcdd5bb20f3d..e80deac1436b 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -58,40 +58,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val) { - FILE *f = fopen(file_path, "w"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fprintf(f, "%lu", val) < 0) { - perror("fprintf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return write_sysfs(file_path, val); }
static int ksm_read_sysfs(const char *file_path, unsigned long *val) { - FILE *f = fopen(file_path, "r"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fscanf(f, "%lu", val) != 1) { - perror("fscanf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return read_sysfs(file_path, val); }
static void ksm_print_sysfs(void) diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index a41bc1234b37..95b6f043a3cb 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,7 +77,7 @@ void show(unsigned long ps) system(buf); }
-unsigned long read_sysfs(int warn, char *fmt, ...) +unsigned long thuge_read_sysfs(int warn, char *fmt, ...) { char *line = NULL; size_t linelen = 0; @@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
unsigned long read_free(unsigned long ps) { - return read_sysfs(ps != getpagesize(), + return thuge_read_sysfs(ps != getpagesize(), "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", ps >> 10); } @@ -195,7 +195,7 @@ void find_pagesizes(void) } globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 1357e2d6a7b6..d899c272e0ee 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -486,3 +486,41 @@ int close_procmap(struct procmap_fd *procmap) { return close(procmap->fd); } + +int write_sysfs(const char *file_path, unsigned long val) +{ + FILE *f = fopen(file_path, "w"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fprintf(f, "%lu", val) < 0) { + perror("fprintf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} + +int read_sysfs(const char *file_path, unsigned long *val) +{ + FILE *f = fopen(file_path, "r"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fscanf(f, "%lu", val) != 1) { + perror("fscanf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9211ba640d9c..f84c7c4680ea 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -87,6 +87,8 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out); int query_procmap(struct procmap_fd *procmap); bool find_vma_procmap(struct procmap_fd *procmap, void *address); int close_procmap(struct procmap_fd *procmap); +int write_sysfs(const char *file_path, unsigned long val); +int read_sysfs(const char *file_path, unsigned long *val);
static inline int open_self_procmap(struct procmap_fd *procmap_out) {
On Thu, May 29, 2025 at 03:56:49PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
Extract read_sysfs and write_sysfs into vm_util. Meanwhile, rename the function in thuge-gen that has the same name as read_sysfs.
Nice!
Signed-off-by: Pu Lehui pulehui@huawei.com
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
tools/testing/selftests/mm/ksm_tests.c | 32 ++-------------------- tools/testing/selftests/mm/thuge-gen.c | 6 ++-- tools/testing/selftests/mm/vm_util.c | 38 ++++++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 2 ++ 4 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index dcdd5bb20f3d..e80deac1436b 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -58,40 +58,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val) {
- FILE *f = fopen(file_path, "w");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
- return write_sysfs(file_path, val);
}
static int ksm_read_sysfs(const char *file_path, unsigned long *val) {
- FILE *f = fopen(file_path, "r");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
- return read_sysfs(file_path, val);
}
static void ksm_print_sysfs(void) diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index a41bc1234b37..95b6f043a3cb 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,7 +77,7 @@ void show(unsigned long ps) system(buf); }
-unsigned long read_sysfs(int warn, char *fmt, ...) +unsigned long thuge_read_sysfs(int warn, char *fmt, ...) {
I wonder if we could update these to use the newly shared functions?
Not a big deal though, perhaps a bit out of scope here, more of a nice-to-have.
char *line = NULL; size_t linelen = 0; @@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
unsigned long read_free(unsigned long ps) {
- return read_sysfs(ps != getpagesize(),
- return thuge_read_sysfs(ps != getpagesize(), "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", ps >> 10);
} @@ -195,7 +195,7 @@ void find_pagesizes(void) } globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
- if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 1357e2d6a7b6..d899c272e0ee 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -486,3 +486,41 @@ int close_procmap(struct procmap_fd *procmap) { return close(procmap->fd); }
+int write_sysfs(const char *file_path, unsigned long val) +{
- FILE *f = fopen(file_path, "w");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
+}
+int read_sysfs(const char *file_path, unsigned long *val) +{
- FILE *f = fopen(file_path, "r");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
+} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9211ba640d9c..f84c7c4680ea 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -87,6 +87,8 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out); int query_procmap(struct procmap_fd *procmap); bool find_vma_procmap(struct procmap_fd *procmap, void *address); int close_procmap(struct procmap_fd *procmap); +int write_sysfs(const char *file_path, unsigned long val); +int read_sysfs(const char *file_path, unsigned long *val);
static inline int open_self_procmap(struct procmap_fd *procmap_out) { -- 2.34.1
On 2025/5/30 19:48, Lorenzo Stoakes wrote:
On Thu, May 29, 2025 at 03:56:49PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
Extract read_sysfs and write_sysfs into vm_util. Meanwhile, rename the function in thuge-gen that has the same name as read_sysfs.
Nice!
Signed-off-by: Pu Lehui pulehui@huawei.com
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
tools/testing/selftests/mm/ksm_tests.c | 32 ++-------------------- tools/testing/selftests/mm/thuge-gen.c | 6 ++-- tools/testing/selftests/mm/vm_util.c | 38 ++++++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 2 ++ 4 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index dcdd5bb20f3d..e80deac1436b 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -58,40 +58,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val) {
- FILE *f = fopen(file_path, "w");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
return write_sysfs(file_path, val); }
static int ksm_read_sysfs(const char *file_path, unsigned long *val) {
- FILE *f = fopen(file_path, "r");
- if (!f) {
fprintf(stderr, "f %s\n", file_path);
perror("fopen");
return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
fclose(f);
return 1;
- }
- fclose(f);
- return 0;
return read_sysfs(file_path, val); }
static void ksm_print_sysfs(void)
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index a41bc1234b37..95b6f043a3cb 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,7 +77,7 @@ void show(unsigned long ps) system(buf); }
-unsigned long read_sysfs(int warn, char *fmt, ...) +unsigned long thuge_read_sysfs(int warn, char *fmt, ...) {
I wonder if we could update these to use the newly shared functions?
Not a big deal though, perhaps a bit out of scope here, more of a nice-to-have.
Hi Lorenzo, Andrew,
Yep, we can do it more. But, actually, I am not sure about the merge process of mm module. Do I need to resend the whole series or just the diff below?
From 13ecf9d66a0068d520be955e3ca8d9c0dc9d8393 Mon Sep 17 00:00:00 2001 From: Pu Lehui pulehui@huawei.com Date: Tue, 3 Jun 2025 06:50:43 +0000 Subject: [PATCH] selftests/mm: Use generic read_sysfs in thuge-gen test
Use generic read_sysfs in thuge-gen test.
Signed-off-by: Pu Lehui pulehui@huawei.com --- tools/testing/selftests/mm/thuge-gen.c | 37 +++++++------------------- 1 file changed, 9 insertions(+), 28 deletions(-)
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 95b6f043a3cb..5a32ed0ba26c 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,40 +77,19 @@ void show(unsigned long ps) system(buf); }
-unsigned long thuge_read_sysfs(int warn, char *fmt, ...) +unsigned long read_free(unsigned long ps) { - char *line = NULL; - size_t linelen = 0; - char buf[100]; - FILE *f; - va_list ap; unsigned long val = 0; + char buf[100];
- va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - va_end(ap); + snprintf(buf, sizeof buf, + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + read_sysfs(buf, &val);
- f = fopen(buf, "r"); - if (!f) { - if (warn) - ksft_print_msg("missing %s\n", buf); - return 0; - } - if (getline(&line, &linelen, f) > 0) { - sscanf(line, "%lu", &val); - } - fclose(f); - free(line); return val; }
-unsigned long read_free(unsigned long ps) -{ - return thuge_read_sysfs(ps != getpagesize(), - "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", - ps >> 10); -} - void test_mmap(unsigned long size, unsigned flags) { char *map; @@ -173,6 +152,7 @@ void test_shmget(unsigned long size, unsigned flags) void find_pagesizes(void) { unsigned long largest = getpagesize(); + unsigned long shmmax_val = 0; int i; glob_t g;
@@ -195,7 +175,8 @@ void find_pagesizes(void) } globfree(&g);
- if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); + if (shmmax_val < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
On Tue, 3 Jun 2025 15:17:49 +0800 Pu Lehui pulehui@huaweicloud.com wrote:
Not a big deal though, perhaps a bit out of scope here, more of a nice-to-have.
...
Yep, we can do it more. But, actually, I am not sure about the merge process of mm module. Do I need to resend the whole series or just the diff below?
A little diff like this is great, although this one didn't apply for me.
But if we're to get this series into 6.16-rc1, now is not the time to be changing it. Please send out a formal patch after -rc1?
From: Pu Lehui pulehui@huawei.com
Add test about uprobe pte be orphan during vma merge.
Signed-off-by: Pu Lehui pulehui@huawei.com --- tools/testing/selftests/mm/merge.c | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index c76646cdf6e6..8e1f38d23384 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,11 +2,13 @@
#define _GNU_SOURCE #include "../kselftest_harness.h" +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> +#include <linux/perf_event.h> #include "vm_util.h"
FIXTURE(merge) @@ -452,4 +454,44 @@ TEST_F(merge, forked_source_vma) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); }
+TEST_F(merge, handle_uprobe_upon_merged_vma) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + unsigned int page_size = self->page_size; + const char *probe_file = "./foo"; + char *carveout = self->carveout; + struct perf_event_attr attr; + unsigned long type; + void *ptr1, *ptr2; + int fd; + + fd = open(probe_file, O_RDWR|O_CREAT, 0600); + ASSERT_GE(fd, 0); + + ASSERT_EQ(ftruncate(fd, page_size), 0); + ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; + attr.type = type; + attr.config1 = (__u64)(long)probe_file; + attr.config2 = 0x0; + + ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0); + + ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC, + MAP_PRIVATE | MAP_FIXED, fd, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + ptr2 = mremap(ptr1, page_size, 2 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_NE(mremap(ptr2, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); + + close(fd); + remove(probe_file); +} + TEST_HARNESS_MAIN
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html#opti...
Rule: add the tag "Cc: stable@vger.kernel.org" in the sign-off area to have the patch automatically included in the stable tree. Subject: [PATCH v1 4/4] selftests/mm: Add test about uprobe pte be orphan during vma merge Link: https://lore.kernel.org/stable/20250529155650.4017699-5-pulehui%40huaweiclou...
On Thu, May 29, 2025 at 03:56:50PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
Add test about uprobe pte be orphan during vma merge.
Signed-off-by: Pu Lehui pulehui@huawei.com
tools/testing/selftests/mm/merge.c | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index c76646cdf6e6..8e1f38d23384 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,11 +2,13 @@
#define _GNU_SOURCE #include "../kselftest_harness.h" +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> +#include <linux/perf_event.h> #include "vm_util.h"
Need to include sys/syscall.h...
FIXTURE(merge) @@ -452,4 +454,44 @@ TEST_F(merge, forked_source_vma) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); }
+TEST_F(merge, handle_uprobe_upon_merged_vma) +{
- const size_t attr_sz = sizeof(struct perf_event_attr);
- unsigned int page_size = self->page_size;
- const char *probe_file = "./foo";
- char *carveout = self->carveout;
- struct perf_event_attr attr;
- unsigned long type;
- void *ptr1, *ptr2;
- int fd;
- fd = open(probe_file, O_RDWR|O_CREAT, 0600);
- ASSERT_GE(fd, 0);
- ASSERT_EQ(ftruncate(fd, page_size), 0);
- ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
- memset(&attr, 0, attr_sz);
- attr.size = attr_sz;
- attr.type = type;
- attr.config1 = (__u64)(long)probe_file;
- attr.config2 = 0x0;
- ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
...Because this results in:
In file included from merge.c:4: merge.c: In function ‘merge_handle_uprobe_upon_merged_vma’: merge.c:480:27: error: ‘__NR_perf_event_open’ undeclared (first use in this function) 480 | ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
Otherwise :>)
- ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
MAP_PRIVATE | MAP_FIXED, fd, 0);
- ASSERT_NE(ptr1, MAP_FAILED);
- ptr2 = mremap(ptr1, page_size, 2 * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
- ASSERT_NE(ptr2, MAP_FAILED);
- ASSERT_NE(mremap(ptr2, page_size, page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
- close(fd);
- remove(probe_file);
+}
TEST_HARNESS_MAIN
2.34.1
On 2025/5/30 19:32, Lorenzo Stoakes wrote:
On Thu, May 29, 2025 at 03:56:50PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
Add test about uprobe pte be orphan during vma merge.
Signed-off-by: Pu Lehui pulehui@huawei.com
tools/testing/selftests/mm/merge.c | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index c76646cdf6e6..8e1f38d23384 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,11 +2,13 @@
#define _GNU_SOURCE #include "../kselftest_harness.h" +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> +#include <linux/perf_event.h> #include "vm_util.h"
Need to include sys/syscall.h...
FIXTURE(merge) @@ -452,4 +454,44 @@ TEST_F(merge, forked_source_vma) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); }
+TEST_F(merge, handle_uprobe_upon_merged_vma) +{
- const size_t attr_sz = sizeof(struct perf_event_attr);
- unsigned int page_size = self->page_size;
- const char *probe_file = "./foo";
- char *carveout = self->carveout;
- struct perf_event_attr attr;
- unsigned long type;
- void *ptr1, *ptr2;
- int fd;
- fd = open(probe_file, O_RDWR|O_CREAT, 0600);
- ASSERT_GE(fd, 0);
- ASSERT_EQ(ftruncate(fd, page_size), 0);
- ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
- memset(&attr, 0, attr_sz);
- attr.size = attr_sz;
- attr.type = type;
- attr.config1 = (__u64)(long)probe_file;
- attr.config2 = 0x0;
- ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
...Because this results in:
In file included from merge.c:4: merge.c: In function ‘merge_handle_uprobe_upon_merged_vma’: merge.c:480:27: error: ‘__NR_perf_event_open’ undeclared (first use in this function) 480 | ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
I did not encounter this problem when compiling in the tools/testing/selftests/mm directory, but in any case, adding the sys/syscall.h header file makes sense.
Otherwise :>)
- ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
MAP_PRIVATE | MAP_FIXED, fd, 0);
- ASSERT_NE(ptr1, MAP_FAILED);
- ptr2 = mremap(ptr1, page_size, 2 * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
- ASSERT_NE(ptr2, MAP_FAILED);
- ASSERT_NE(mremap(ptr2, page_size, page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
- close(fd);
- remove(probe_file);
+}
- TEST_HARNESS_MAIN
-- 2.34.1
On Tue, Jun 03, 2025 at 03:08:22PM +0800, Pu Lehui wrote:
On 2025/5/30 19:32, Lorenzo Stoakes wrote:
On Thu, May 29, 2025 at 03:56:50PM +0000, Pu Lehui wrote:
From: Pu Lehui pulehui@huawei.com
Add test about uprobe pte be orphan during vma merge.
Signed-off-by: Pu Lehui pulehui@huawei.com
tools/testing/selftests/mm/merge.c | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index c76646cdf6e6..8e1f38d23384 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,11 +2,13 @@
#define _GNU_SOURCE #include "../kselftest_harness.h" +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> +#include <linux/perf_event.h> #include "vm_util.h"
Need to include sys/syscall.h...
FIXTURE(merge) @@ -452,4 +454,44 @@ TEST_F(merge, forked_source_vma) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); }
+TEST_F(merge, handle_uprobe_upon_merged_vma) +{
- const size_t attr_sz = sizeof(struct perf_event_attr);
- unsigned int page_size = self->page_size;
- const char *probe_file = "./foo";
- char *carveout = self->carveout;
- struct perf_event_attr attr;
- unsigned long type;
- void *ptr1, *ptr2;
- int fd;
- fd = open(probe_file, O_RDWR|O_CREAT, 0600);
- ASSERT_GE(fd, 0);
- ASSERT_EQ(ftruncate(fd, page_size), 0);
- ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
- memset(&attr, 0, attr_sz);
- attr.size = attr_sz;
- attr.type = type;
- attr.config1 = (__u64)(long)probe_file;
- attr.config2 = 0x0;
- ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
...Because this results in:
In file included from merge.c:4: merge.c: In function ‘merge_handle_uprobe_upon_merged_vma’: merge.c:480:27: error: ‘__NR_perf_event_open’ undeclared (first use in this function) 480 | ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
I did not encounter this problem when compiling in the tools/testing/selftests/mm directory, but in any case, adding the sys/syscall.h header file makes sense.
Weird, it can depend on what system headers are implicitly included due to a header in the dependency chain including something else. At any rate, I think Andrew has already updated this?
If you send a respin obviously do include this fix.
Otherwise :>)
- ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
MAP_PRIVATE | MAP_FIXED, fd, 0);
- ASSERT_NE(ptr1, MAP_FAILED);
- ptr2 = mremap(ptr1, page_size, 2 * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
- ASSERT_NE(ptr2, MAP_FAILED);
- ASSERT_NE(mremap(ptr2, page_size, page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
- close(fd);
- remove(probe_file);
+}
- TEST_HARNESS_MAIN
-- 2.34.1
linux-stable-mirror@lists.linaro.org