[...]
Thanks for giving mu sugegstions a churn. I think we can further improve/simplify some things. I added some comments, but might have more regarding MMF_VM_MERGE_ANY / MMF_VM_MERGEABLE.
[I'll try reowkring your patch after I send this mail to play with some simplifications]
arch/s390/mm/gmap.c | 1 + include/linux/ksm.h | 23 +++++-- include/linux/sched/coredump.h | 1 + include/uapi/linux/prctl.h | 2 + kernel/fork.c | 1 + kernel/sys.c | 23 +++++++ mm/ksm.c | 111 ++++++++++++++++++++++++++------- mm/mmap.c | 7 +++ 8 files changed, 142 insertions(+), 27 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 5a716bdcba05..9d85e5589474 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2591,6 +2591,7 @@ int gmap_mark_unmergeable(void) int ret; VMA_ITERATOR(vmi, mm, 0);
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
Okay, that should keep the existing mechanism working. (but users can still mess it up)
Might be worth a comment
/* * Make sure to disable KSM (if enabled for the whole process or * individual VMAs). Note that nothing currently hinders user space * from re-enabling it. */
for_each_vma(vmi, vma) { /* Copy vm_flags to avoid partial modifications in ksm_madvise */ vm_flags = vma->vm_flags; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7e232ba59b86..f24f9faf1561 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -18,20 +18,29 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); -int __ksm_enter(struct mm_struct *mm); -void __ksm_exit(struct mm_struct *mm);
+int ksm_add_mm(struct mm_struct *mm); +void ksm_add_vma(struct vm_area_struct *vma); +void ksm_add_vmas(struct mm_struct *mm);
+int __ksm_enter(struct mm_struct *mm, int flag); +void __ksm_exit(struct mm_struct *mm, int flag); static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) {
- if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags))
return ksm_add_mm(mm);
ksm_fork() runs before copying any VMAs. Copying the bit should be sufficient.
Would it be possible to rework to something like:
if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) set_bit(MMF_VM_MERGE_ANY, &mm->flags) if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) return __ksm_enter(mm);
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
I rememebr proposing that enabling MMF_VM_MERGE_ANY would simply enable MMF_VM_MERGEABLE.
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
return __ksm_enter(mm);
return 0; }return __ksm_enter(mm, MMF_VM_MERGEABLE);
static inline void ksm_exit(struct mm_struct *mm) {
- if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm);
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_exit(mm, MMF_VM_MERGE_ANY);
- else if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm, MMF_VM_MERGEABLE);
Can we do
if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) __ksm_exit(mm);
And simply let __ksm_exit() clear both bits?
} /* @@ -53,6 +62,10 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */
[...]
#endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..759b3f53e53f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,6 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index f68954d05e89..1520697cf6c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -686,6 +686,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (vma_iter_bulk_store(&vmi, tmp)) goto fail_nomem_vmi_store;
ksm_add_vma(tmp);
Is this really required? The relevant VMAs should have VM_MERGEABLE set.
mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt);
diff --git a/kernel/sys.c b/kernel/sys.c index 495cd87d9bf4..9bba163d2d04 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -15,6 +15,7 @@ #include <linux/highuid.h> #include <linux/fs.h> #include <linux/kmod.h> +#include <linux/ksm.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> @@ -2661,6 +2662,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; +#ifdef CONFIG_KSM
- case PR_SET_MEMORY_MERGE:
if (mmap_write_lock_killable(me->mm))
return -EINTR;
if (arg2) {
int err = ksm_add_mm(me->mm);
if (!err)
ksm_add_vmas(me->mm);
} else {
clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
Okay, so disabling doesn't actually unshare anything.
}
mmap_write_unlock(me->mm);
break;
- case PR_GET_MEMORY_MERGE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
break;
+#endif default: error = -EINVAL; break; diff --git a/mm/ksm.c b/mm/ksm.c index d7bd28199f6c..ab95ae0f9def 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -534,10 +534,33 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; } +static bool vma_ksm_compatible(struct vm_area_struct *vma) +{
- if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP |
VM_IO | VM_DONTEXPAND | VM_HUGETLB |
VM_MIXEDMAP))
return false; /* just ignore the advice */
- if (vma_is_dax(vma))
return false;
+#ifdef VM_SAO
- if (vma->vm_flags & VM_SAO)
return false;
+#endif +#ifdef VM_SPARC_ADI
- if (vma->vm_flags & VM_SPARC_ADI)
return false;
+#endif
- return true;
+}
- static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma;
unrelated change
if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); @@ -1065,6 +1088,7 @@ static int unmerge_and_remove_all_rmap_items(void) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
} else spin_unlock(&ksm_mmlist_lock);clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm);
@@ -2495,6 +2519,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmap_read_unlock(mm); mmdrop(mm); } else {clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
@@ -2571,6 +2596,63 @@ static int ksm_scan_thread(void *nothing) return 0; } +static void __ksm_add_vma(struct vm_area_struct *vma) +{
- unsigned long vm_flags = vma->vm_flags;
- if (vm_flags & VM_MERGEABLE)
return;
- if (vma_ksm_compatible(vma)) {
vm_flags |= VM_MERGEABLE;
vm_flags_reset(vma, vm_flags);
- }
+}
+/**
- ksm_add_vma - Mark vma as mergeable
"if compatible"
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
+{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- for_each_vma(vmi, vma)
__ksm_add_vma(vma);
+}
+/**
- ksm_add_mm - Add mm to mm ksm list
- @mm: Pointer to mm
- Returns 0 on success, otherwise error code
- */
+int ksm_add_mm(struct mm_struct *mm) +{
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return -EINVAL;
- if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
return -EINVAL;
- return __ksm_enter(mm, MMF_VM_MERGE_ANY);
+}
- int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
@@ -2579,28 +2661,13 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, switch (advice) { case MADV_MERGEABLE:
/*
* Be somewhat over-protective for now!
*/
if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_HUGETLB | VM_MIXEDMAP))
return 0; /* just ignore the advice */
if (vma_is_dax(vma))
if (vma->vm_flags & VM_MERGEABLE) return 0;
-#ifdef VM_SAO
if (*vm_flags & VM_SAO)
if (!vma_ksm_compatible(vma)) return 0;
-#endif -#ifdef VM_SPARC_ADI
if (*vm_flags & VM_SPARC_ADI)
return 0;
-#endif if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
err = __ksm_enter(mm);
}err = __ksm_enter(mm, MMF_VM_MERGEABLE); if (err) return err;
@@ -2626,7 +2693,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL_GPL(ksm_madvise); -int __ksm_enter(struct mm_struct *mm) +int __ksm_enter(struct mm_struct *mm, int flag) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; @@ -2659,7 +2726,7 @@ int __ksm_enter(struct mm_struct *mm) list_add_tail(&slot->mm_node, &ksm_scan.mm_slot->slot.mm_node); spin_unlock(&ksm_mmlist_lock);
- set_bit(MMF_VM_MERGEABLE, &mm->flags);
- set_bit(flag, &mm->flags); mmgrab(mm);
if (needs_wakeup) @@ -2668,7 +2735,7 @@ int __ksm_enter(struct mm_struct *mm) return 0; } -void __ksm_exit(struct mm_struct *mm) +void __ksm_exit(struct mm_struct *mm, int flag) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; @@ -2700,7 +2767,7 @@ void __ksm_exit(struct mm_struct *mm) if (easy_to_free) { mm_slot_free(mm_slot_cache, mm_slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm); } else if (mm_slot) { mmap_write_lock(mm);clear_bit(flag, &mm->flags);
diff --git a/mm/mmap.c b/mm/mmap.c index 740b54be3ed4..483e182e0b9d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> +#include <linux/ksm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2213,6 +2214,8 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, /* vma_complete stores the new vma */ vma_complete(&vp, vmi, vma->vm_mm);
- ksm_add_vma(new);
Splitting a VMA shouldn't modify VM_MERGEABLE, so I assume this is not required?
/* Success. */ if (new_below) vma_next(vmi); @@ -2664,6 +2667,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (file && vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); file = vma->vm_file;
- ksm_add_vma(vma); expanded: perf_event_mmap(vma);
@@ -2936,6 +2940,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, goto mas_store_fail; mm->map_count++;
- ksm_add_vma(vma); out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT;
@@ -3180,6 +3185,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (vma_link(mm, new_vma)) goto out_vma_link; *need_rmap_locks = false;
ksm_add_vma(new_vma);
Copying shouldn't modify VM_MERGEABLE, so I think this is not required?
} validate_mm_mt(mm); return new_vma; @@ -3356,6 +3362,7 @@ static struct vm_area_struct *__install_special_mapping( vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); perf_event_mmap(vma);
- ksm_add_vma(vma);
IIUC, special mappings will never be considered a reasonable target for KSM (especially, because at least VM_DONTEXPAND is always set).
I think you can just drop this call.
David Hildenbrand david@redhat.com writes:
[...]
Thanks for giving mu sugegstions a churn. I think we can further improve/simplify some things. I added some comments, but might have more regarding MMF_VM_MERGE_ANY / MMF_VM_MERGEABLE.
[I'll try reowkring your patch after I send this mail to play with some simplifications]
arch/s390/mm/gmap.c | 1 + include/linux/ksm.h | 23 +++++-- include/linux/sched/coredump.h | 1 + include/uapi/linux/prctl.h | 2 + kernel/fork.c | 1 + kernel/sys.c | 23 +++++++ mm/ksm.c | 111 ++++++++++++++++++++++++++------- mm/mmap.c | 7 +++ 8 files changed, 142 insertions(+), 27 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 5a716bdcba05..9d85e5589474 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2591,6 +2591,7 @@ int gmap_mark_unmergeable(void) int ret; VMA_ITERATOR(vmi, mm, 0);
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
Okay, that should keep the existing mechanism working. (but users can still mess it up)
Might be worth a comment
/*
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
*/
I'll add the comment.
for_each_vma(vmi, vma) { /* Copy vm_flags to avoid partial modifications in ksm_madvise */ vm_flags = vma->vm_flags; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7e232ba59b86..f24f9faf1561 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -18,20 +18,29 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); -int __ksm_enter(struct mm_struct *mm); -void __ksm_exit(struct mm_struct *mm);
+int ksm_add_mm(struct mm_struct *mm); +void ksm_add_vma(struct vm_area_struct *vma); +void ksm_add_vmas(struct mm_struct *mm);
+int __ksm_enter(struct mm_struct *mm, int flag); +void __ksm_exit(struct mm_struct *mm, int flag); static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) {
- if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags))
return ksm_add_mm(mm);
ksm_fork() runs before copying any VMAs. Copying the bit should be sufficient.
Would it be possible to rework to something like:
if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) set_bit(MMF_VM_MERGE_ANY, &mm->flags) if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) return __ksm_enter(mm);
That will work.
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a similar change there?
I rememebr proposing that enabling MMF_VM_MERGE_ANY would simply enable MMF_VM_MERGEABLE.
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
return __ksm_enter(mm);
return 0; } static inline void ksm_exit(struct mm_struct *mm) {return __ksm_enter(mm, MMF_VM_MERGEABLE);
- if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm);
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_exit(mm, MMF_VM_MERGE_ANY);
- else if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm, MMF_VM_MERGEABLE);
Can we do
if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) __ksm_exit(mm);
And simply let __ksm_exit() clear both bits?
Yes, I'll make the change.
} /* @@ -53,6 +62,10 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */
[...]
#endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..759b3f53e53f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,6 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index f68954d05e89..1520697cf6c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -686,6 +686,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (vma_iter_bulk_store(&vmi, tmp)) goto fail_nomem_vmi_store;
ksm_add_vma(tmp);
Is this really required? The relevant VMAs should have VM_MERGEABLE set.
I'll fix it.
mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt);
diff --git a/kernel/sys.c b/kernel/sys.c index 495cd87d9bf4..9bba163d2d04 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -15,6 +15,7 @@ #include <linux/highuid.h> #include <linux/fs.h> #include <linux/kmod.h> +#include <linux/ksm.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> @@ -2661,6 +2662,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; +#ifdef CONFIG_KSM
- case PR_SET_MEMORY_MERGE:
if (mmap_write_lock_killable(me->mm))
return -EINTR;
if (arg2) {
int err = ksm_add_mm(me->mm);
if (!err)
ksm_add_vmas(me->mm);
} else {
clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
Okay, so disabling doesn't actually unshare anything.
}
mmap_write_unlock(me->mm);
break;
- case PR_GET_MEMORY_MERGE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
break;
+#endif default: error = -EINVAL; break; diff --git a/mm/ksm.c b/mm/ksm.c index d7bd28199f6c..ab95ae0f9def 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -534,10 +534,33 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; } +static bool vma_ksm_compatible(struct vm_area_struct *vma) +{
- if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP |
VM_IO | VM_DONTEXPAND | VM_HUGETLB |
VM_MIXEDMAP))
return false; /* just ignore the advice */
- if (vma_is_dax(vma))
return false;
+#ifdef VM_SAO
- if (vma->vm_flags & VM_SAO)
return false;
+#endif +#ifdef VM_SPARC_ADI
- if (vma->vm_flags & VM_SPARC_ADI)
return false;
+#endif
- return true;
+}
- static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma;
unrelated change
Removed.
if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); @@ -1065,6 +1088,7 @@ static int unmerge_and_remove_all_rmap_items(void) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
} else spin_unlock(&ksm_mmlist_lock);clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm);
@@ -2495,6 +2519,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmap_read_unlock(mm); mmdrop(mm); } else {clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
@@ -2571,6 +2596,63 @@ static int ksm_scan_thread(void *nothing) return 0; } +static void __ksm_add_vma(struct vm_area_struct *vma) +{
- unsigned long vm_flags = vma->vm_flags;
- if (vm_flags & VM_MERGEABLE)
return;
- if (vma_ksm_compatible(vma)) {
vm_flags |= VM_MERGEABLE;
vm_flags_reset(vma, vm_flags);
- }
+}
+/**
- ksm_add_vma - Mark vma as mergeable
"if compatible"
I'll added the above.
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
I guess you forgot your name suggestion?
+{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- for_each_vma(vmi, vma)
__ksm_add_vma(vma);
+}
+/**
- ksm_add_mm - Add mm to mm ksm list
- @mm: Pointer to mm
- Returns 0 on success, otherwise error code
- */
+int ksm_add_mm(struct mm_struct *mm) +{
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return -EINVAL;
- if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
return -EINVAL;
- return __ksm_enter(mm, MMF_VM_MERGE_ANY);
+}
- int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
@@ -2579,28 +2661,13 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, switch (advice) { case MADV_MERGEABLE:
/*
* Be somewhat over-protective for now!
*/
if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_HUGETLB | VM_MIXEDMAP))
return 0; /* just ignore the advice */
if (vma_is_dax(vma))
if (vma->vm_flags & VM_MERGEABLE) return 0;
-#ifdef VM_SAO
if (*vm_flags & VM_SAO)
if (!vma_ksm_compatible(vma)) return 0;
-#endif -#ifdef VM_SPARC_ADI
if (*vm_flags & VM_SPARC_ADI)
return 0;
-#endif if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
err = __ksm_enter(mm);
}err = __ksm_enter(mm, MMF_VM_MERGEABLE); if (err) return err;
@@ -2626,7 +2693,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL_GPL(ksm_madvise); -int __ksm_enter(struct mm_struct *mm) +int __ksm_enter(struct mm_struct *mm, int flag) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; @@ -2659,7 +2726,7 @@ int __ksm_enter(struct mm_struct *mm) list_add_tail(&slot->mm_node, &ksm_scan.mm_slot->slot.mm_node); spin_unlock(&ksm_mmlist_lock);
- set_bit(MMF_VM_MERGEABLE, &mm->flags);
- set_bit(flag, &mm->flags); mmgrab(mm); if (needs_wakeup)
@@ -2668,7 +2735,7 @@ int __ksm_enter(struct mm_struct *mm) return 0; } -void __ksm_exit(struct mm_struct *mm) +void __ksm_exit(struct mm_struct *mm, int flag) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; @@ -2700,7 +2767,7 @@ void __ksm_exit(struct mm_struct *mm) if (easy_to_free) { mm_slot_free(mm_slot_cache, mm_slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm); } else if (mm_slot) { mmap_write_lock(mm);clear_bit(flag, &mm->flags);
diff --git a/mm/mmap.c b/mm/mmap.c index 740b54be3ed4..483e182e0b9d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> +#include <linux/ksm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2213,6 +2214,8 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, /* vma_complete stores the new vma */ vma_complete(&vp, vmi, vma->vm_mm);
- ksm_add_vma(new);
Splitting a VMA shouldn't modify VM_MERGEABLE, so I assume this is not required?
I'll fix it.
/* Success. */ if (new_below) vma_next(vmi); @@ -2664,6 +2667,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (file && vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); file = vma->vm_file;
- ksm_add_vma(vma); expanded: perf_event_mmap(vma); @@ -2936,6 +2940,7 @@ static int do_brk_flags(struct vma_iterator *vmi,
struct vm_area_struct *vma, goto mas_store_fail; mm->map_count++;
- ksm_add_vma(vma); out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT;
@@ -3180,6 +3185,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (vma_link(mm, new_vma)) goto out_vma_link; *need_rmap_locks = false;
ksm_add_vma(new_vma);
Copying shouldn't modify VM_MERGEABLE, so I think this is not required?
I'll fix it.
} validate_mm_mt(mm); return new_vma; @@ -3356,6 +3362,7 @@ static struct vm_area_struct *__install_special_mapping( vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); perf_event_mmap(vma);
- ksm_add_vma(vma);
IIUC, special mappings will never be considered a reasonable target for KSM (especially, because at least VM_DONTEXPAND is always set).
I think you can just drop this call.
I dropped it.
[...]
That will work.
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a similar change there?
Yes.
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
I guess you forgot your name suggestion?
Yeah, I reconsidered because the first idea I had was not particularly good. Maybe
ksm_enable_for_all_vmas()
But not so sure. If you think the "add" terminology is a good fit, keep it like that.
Thanks for bearing with me :)
On 12.04.23 20:41, David Hildenbrand wrote:
[...]
That will work.
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a similar change there?
Yes.
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
I guess you forgot your name suggestion?
Yeah, I reconsidered because the first idea I had was not particularly good. Maybe
ksm_enable_for_all_vmas()
But not so sure. If you think the "add" terminology is a good fit, keep it like that.
Thanks for bearing with me :)
I briefly played with your patch to see how much it can be simplified. Always enabling ksm (setting MMF_VM_MERGEABLE) before setting MMF_VM_MERGE_ANY might simplify things. ksm_enable_merge_any() [or however it should be called] and ksm_fork() contain the interesting bits.
Feel free to incorporate what you consider valuable (uncompiled, untested).
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 5a716bdcba05..5b2eef31398e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2591,6 +2591,12 @@ int gmap_mark_unmergeable(void) int ret; VMA_ITERATOR(vmi, mm, 0);
+ /* + * Make sure to disable KSM (if enabled for the whole process or + * individual VMAs). Note that nothing currently hinders user space + * from re-enabling it. + */ + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); for_each_vma(vmi, vma) { /* Copy vm_flags to avoid partial modifications in ksm_madvise */ vm_flags = vma->vm_flags; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7e232ba59b86..c638b034d586 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -18,13 +18,24 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); + +void ksm_add_vma(struct vm_area_struct *vma); +int ksm_enable_merge_any(struct mm_struct *mm); + int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm);
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { - if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) - return __ksm_enter(mm); + int ret; + + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) { + ret = __ksm_enter(mm); + if (ret) + return ret; + } + if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) + set_bit(MMF_VM_MERGE_ANY, &mm->flags); return 0; }
@@ -53,6 +64,10 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
#else /* !CONFIG_KSM */
+static inline void ksm_add_vma(struct vm_area_struct *vma) +{ +} + static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0e17ae7fbfd3..0ee96ea7a0e9 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -90,4 +90,5 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
+#define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..759b3f53e53f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,6 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0
+#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 495cd87d9bf4..8c2e50edeb18 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -15,6 +15,7 @@ #include <linux/highuid.h> #include <linux/fs.h> #include <linux/kmod.h> +#include <linux/ksm.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> @@ -2661,6 +2662,30 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; +#ifdef CONFIG_KSM + case PR_SET_MEMORY_MERGE: + if (mmap_write_lock_killable(me->mm)) + return -EINTR; + + if (arg2) { + error = ksm_enable_merge_any(me->mm); + } else { + /* + * TODO: we might want disable KSM on all VMAs and + * trigger unsharing to completely disable KSM. + */ + clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags); + error = 0; + } + mmap_write_unlock(me->mm); + break; + case PR_GET_MEMORY_MERGE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + + error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); + break; +#endif default: error = -EINVAL; break; diff --git a/mm/ksm.c b/mm/ksm.c index 2b8d30068cbb..76ceec35395c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -512,6 +512,28 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; }
+static bool vma_ksm_compatible(struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP | + VM_IO | VM_DONTEXPAND | VM_HUGETLB | + VM_MIXEDMAP)) + return false; /* just ignore the advice */ + + if (vma_is_dax(vma)) + return false; + +#ifdef VM_SAO + if (vma->vm_flags & VM_SAO) + return false; +#endif +#ifdef VM_SPARC_ADI + if (vma->vm_flags & VM_SPARC_ADI) + return false; +#endif + + return true; +} + static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, unsigned long addr) { @@ -1020,6 +1042,7 @@ static int unmerge_and_remove_all_rmap_items(void)
mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags); + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm); } else spin_unlock(&ksm_mmlist_lock); @@ -2395,6 +2418,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags); + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmap_read_unlock(mm); mmdrop(mm); } else { @@ -2471,6 +2495,52 @@ static int ksm_scan_thread(void *nothing) return 0; }
+static void __ksm_add_vma(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + if (vm_flags & VM_MERGEABLE) + return; + + if (vma_ksm_compatible(vma)) { + vm_flags |= VM_MERGEABLE; + vm_flags_reset(vma, vm_flags); + } +} + +/** + * ksm_add_vma - Mark vma as mergeable + * + * @vma: Pointer to vma + */ +void ksm_add_vma(struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + __ksm_add_vma(vma); +} + +int ksm_enable_merge_any(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int ret; + + if (test_bit(MMF_VM_MERGE_ANY, mm->flags)) + return 0; + + if (!test_bit(MMF_VM_MERGEABLE, mm->flags)) { + ret = __ksm_enter(mm); + if (ret) + return ret; + } + set_bit(MMF_VM_MERGE_ANY, &mm->flags); + + VMA_ITERATOR(vmi, mm, 0); + for_each_vma(vmi, vma) + __ksm_add_vma(vma); +} + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) { @@ -2479,25 +2549,10 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
switch (advice) { case MADV_MERGEABLE: - /* - * Be somewhat over-protective for now! - */ - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_MIXEDMAP)) - return 0; /* just ignore the advice */ - - if (vma_is_dax(vma)) + if (vma->vm_flags & VM_MERGEABLE) return 0; - -#ifdef VM_SAO - if (*vm_flags & VM_SAO) - return 0; -#endif -#ifdef VM_SPARC_ADI - if (*vm_flags & VM_SPARC_ADI) + if (!vma_ksm_compatible(vma)) return 0; -#endif
if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { err = __ksm_enter(mm); @@ -2601,6 +2656,7 @@ void __ksm_exit(struct mm_struct *mm) if (easy_to_free) { mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags); + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm); } else if (mm_slot) { mmap_write_lock(mm); diff --git a/mm/mmap.c b/mm/mmap.c index ff68a67a2a7c..1f8619ff58ca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> +#include <linux/ksm.h>
#include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2659,6 +2660,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (file && vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); file = vma->vm_file; + ksm_add_vma(vma); expanded: perf_event_mmap(vma);
@@ -2931,6 +2933,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, goto mas_store_fail;
mm->map_count++; + ksm_add_vma(vma); out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT;
David Hildenbrand david@redhat.com writes:
On 12.04.23 20:41, David Hildenbrand wrote:
[...]
That will work.
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a similar change there?
Yes.
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
I guess you forgot your name suggestion?
Yeah, I reconsidered because the first idea I had was not particularly good. Maybe ksm_enable_for_all_vmas() But not so sure. If you think the "add" terminology is a good fit, keep it like that. Thanks for bearing with me :)
I briefly played with your patch to see how much it can be simplified. Always enabling ksm (setting MMF_VM_MERGEABLE) before setting MMF_VM_MERGE_ANY might simplify things. ksm_enable_merge_any() [or however it should be called] and ksm_fork() contain the interesting bits.
Feel free to incorporate what you consider valuable (uncompiled, untested).
I added most of it. The only change is that I kept ksm_add_vmas as a static function, otherwise I need to define the VMA_ITERATOR at the top of the function.
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 5a716bdcba05..5b2eef31398e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2591,6 +2591,12 @@ int gmap_mark_unmergeable(void) int ret; VMA_ITERATOR(vmi, mm, 0);
- /*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags); for_each_vma(vmi, vma) { /* Copy vm_flags to avoid partial modifications in ksm_madvise */ vm_flags = vma->vm_flags;
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7e232ba59b86..c638b034d586 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -18,13 +18,24 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags);
+void ksm_add_vma(struct vm_area_struct *vma); +int ksm_enable_merge_any(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) {
- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
return __ksm_enter(mm);
- int ret;
- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) {
ret = __ksm_enter(mm);
if (ret)
return ret;
- }
- if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags))
return 0;set_bit(MMF_VM_MERGE_ANY, &mm->flags);
} @@ -53,6 +64,10 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ +static inline void ksm_add_vma(struct vm_area_struct *vma) +{ +}
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0e17ae7fbfd3..0ee96ea7a0e9 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -90,4 +90,5 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) +#define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..759b3f53e53f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,6 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 495cd87d9bf4..8c2e50edeb18 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -15,6 +15,7 @@ #include <linux/highuid.h> #include <linux/fs.h> #include <linux/kmod.h> +#include <linux/ksm.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> @@ -2661,6 +2662,30 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; +#ifdef CONFIG_KSM
- case PR_SET_MEMORY_MERGE:
if (mmap_write_lock_killable(me->mm))
return -EINTR;
if (arg2) {
error = ksm_enable_merge_any(me->mm);
} else {
/*
* TODO: we might want disable KSM on all VMAs and
* trigger unsharing to completely disable KSM.
*/
clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
error = 0;
}
mmap_write_unlock(me->mm);
break;
- case PR_GET_MEMORY_MERGE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
break;
+#endif default: error = -EINVAL; break; diff --git a/mm/ksm.c b/mm/ksm.c index 2b8d30068cbb..76ceec35395c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -512,6 +512,28 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; } +static bool vma_ksm_compatible(struct vm_area_struct *vma) +{
- if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP |
VM_IO | VM_DONTEXPAND | VM_HUGETLB |
VM_MIXEDMAP))
return false; /* just ignore the advice */
- if (vma_is_dax(vma))
return false;
+#ifdef VM_SAO
- if (vma->vm_flags & VM_SAO)
return false;
+#endif +#ifdef VM_SPARC_ADI
- if (vma->vm_flags & VM_SPARC_ADI)
return false;
+#endif
- return true;
+}
static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, unsigned long addr) { @@ -1020,6 +1042,7 @@ static int unmerge_and_remove_all_rmap_items(void) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
} else spin_unlock(&ksm_mmlist_lock);clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm);
@@ -2395,6 +2418,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmap_read_unlock(mm); mmdrop(mm); } else {clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
@@ -2471,6 +2495,52 @@ static int ksm_scan_thread(void *nothing) return 0; } +static void __ksm_add_vma(struct vm_area_struct *vma) +{
- unsigned long vm_flags = vma->vm_flags;
- if (vm_flags & VM_MERGEABLE)
return;
- if (vma_ksm_compatible(vma)) {
vm_flags |= VM_MERGEABLE;
vm_flags_reset(vma, vm_flags);
- }
+}
+/**
- ksm_add_vma - Mark vma as mergeable
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+int ksm_enable_merge_any(struct mm_struct *mm) +{
- struct vm_area_struct *vma;
- int ret;
- if (test_bit(MMF_VM_MERGE_ANY, mm->flags))
return 0;
- if (!test_bit(MMF_VM_MERGEABLE, mm->flags)) {
ret = __ksm_enter(mm);
if (ret)
return ret;
- }
- set_bit(MMF_VM_MERGE_ANY, &mm->flags);
- VMA_ITERATOR(vmi, mm, 0);
- for_each_vma(vmi, vma)
__ksm_add_vma(vma);
+}
int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) { @@ -2479,25 +2549,10 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, switch (advice) { case MADV_MERGEABLE:
/*
* Be somewhat over-protective for now!
*/
if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_HUGETLB | VM_MIXEDMAP))
return 0; /* just ignore the advice */
if (vma_is_dax(vma))
if (vma->vm_flags & VM_MERGEABLE) return 0;
-#ifdef VM_SAO
if (*vm_flags & VM_SAO)
return 0;
-#endif -#ifdef VM_SPARC_ADI
if (*vm_flags & VM_SPARC_ADI)
if (!vma_ksm_compatible(vma)) return 0;
-#endif if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { err = __ksm_enter(mm); @@ -2601,6 +2656,7 @@ void __ksm_exit(struct mm_struct *mm) if (easy_to_free) { mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm); } else if (mm_slot) { mmap_write_lock(mm);clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
diff --git a/mm/mmap.c b/mm/mmap.c index ff68a67a2a7c..1f8619ff58ca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> +#include <linux/ksm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2659,6 +2660,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (file && vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); file = vma->vm_file;
- ksm_add_vma(vma);
expanded: perf_event_mmap(vma); @@ -2931,6 +2933,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, goto mas_store_fail; mm->map_count++;
- ksm_add_vma(vma);
out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; -- 2.39.2
On 12.04.23 21:55, Stefan Roesch wrote:
David Hildenbrand david@redhat.com writes:
On 12.04.23 20:41, David Hildenbrand wrote:
[...]
That will work.
work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- it would simply set MMF_VM_MERGEABLE ?
ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a similar change there?
Yes.
- @vma: Pointer to vma
- */
+void ksm_add_vma(struct vm_area_struct *vma) +{
- struct mm_struct *mm = vma->vm_mm;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
__ksm_add_vma(vma);
+}
+/**
- ksm_add_vmas - Mark all vma's of a process as mergeable
- @mm: Pointer to mm
- */
+void ksm_add_vmas(struct mm_struct *mm)
I'd suggest calling this
I guess you forgot your name suggestion?
Yeah, I reconsidered because the first idea I had was not particularly good. Maybe ksm_enable_for_all_vmas() But not so sure. If you think the "add" terminology is a good fit, keep it like that. Thanks for bearing with me :)
I briefly played with your patch to see how much it can be simplified. Always enabling ksm (setting MMF_VM_MERGEABLE) before setting MMF_VM_MERGE_ANY might simplify things. ksm_enable_merge_any() [or however it should be called] and ksm_fork() contain the interesting bits.
Feel free to incorporate what you consider valuable (uncompiled, untested).
I added most of it. The only change is that I kept ksm_add_vmas as a static function, otherwise I need to define the VMA_ITERATOR at the top of the function.
Makes sense. I'll review patch #3 later, so we can hopefully get this into the 6.4 merge window after letting it rest at least some days in -next.
linux-kselftest-mirror@lists.linaro.org