* Suren Baghdasaryan surenb@google.com [230803 15:16]:
On Thu, Aug 3, 2023 at 11:32 AM Liam R. Howlett Liam.Howlett@oracle.com wrote:
- Suren Baghdasaryan surenb@google.com [230803 13:27]:
vma_prepare() is currently the central place where vmas are being locked before vma_complete() applies changes to them. While this is convenient, it also obscures vma locking and makes it harder to follow the locking rules. Move vma locking out of vma_prepare() and take vma locks explicitly at the locations where vmas are being modified. Move vma locking and replace it with an assertion inside dup_anon_vma() to further clarify the locking pattern inside vma_merge().
Suggested-by: Linus Torvalds torvalds@linuxfoundation.org Suggested-by: Liam R. Howlett Liam.Howlett@oracle.com Signed-off-by: Suren Baghdasaryan surenb@google.com
mm/mmap.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c index 850a39dee075..ae28d6f94c34 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -476,16 +476,6 @@ static inline void init_vma_prep(struct vma_prepare *vp, */ static inline void vma_prepare(struct vma_prepare *vp) {
vma_start_write(vp->vma);
if (vp->adj_next)
vma_start_write(vp->adj_next);
if (vp->insert)
vma_start_write(vp->insert);
if (vp->remove)
vma_start_write(vp->remove);
if (vp->remove2)
vma_start_write(vp->remove2);
if (vp->file) { uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end);
@@ -618,7 +608,7 @@ static inline int dup_anon_vma(struct vm_area_struct *dst, * anon pages imported. */ if (src->anon_vma && !dst->anon_vma) {
vma_start_write(dst);
vma_assert_write_locked(dst); dst->anon_vma = src->anon_vma; return anon_vma_clone(dst, src); }
@@ -650,10 +640,12 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, bool remove_next = false; struct vma_prepare vp;
vma_start_write(vma); if (next && (vma != next) && (end == next->vm_end)) { int ret; remove_next = true;
vma_start_write(next); ret = dup_anon_vma(vma, next); if (ret) return ret;
@@ -708,6 +700,8 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, if (vma_iter_prealloc(vmi)) return -ENOMEM;
vma_start_write(vma);
init_vma_prep(&vp, vma); vma_prepare(&vp); vma_adjust_trans_huge(vma, start, end, 0);
@@ -940,16 +934,21 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, if (!merge_prev && !merge_next) return NULL; /* Not mergeable. */
if (prev)
Maybe if (merge_prev) instead of prev? We will write lock prev if it exists and won't change with the current check (case 3 and 8, specifically), with this change case 4 will need to lock prev as it shifts prev->vm_end lower.
Ah, I see. I was trying to make sure we don't miss any locks and over-locked it for case 3 and 8. Ok, I'll change the check to if (merge_prev) and will add a separate locking for case 4. I think that's what you meant?
Yes, I think that will cover all the cases. Thanks.
vma_start_write(prev);
res = vma = prev; remove = remove2 = adjust = NULL; /* Can we merge both the predecessor and the successor? */ if (merge_prev && merge_next && is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) {
vma_start_write(next); remove = next; /* case 1 */ vma_end = next->vm_end; err = dup_anon_vma(prev, next); if (curr) { /* case 6 */
vma_start_write(curr); remove = curr; remove2 = next; if (!next->anon_vma)
@@ -957,6 +956,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, } } else if (merge_prev) { /* case 2 */ if (curr) {
vma_start_write(curr); err = dup_anon_vma(prev, curr); if (end == curr->vm_end) { /* case 7 */ remove = curr;
@@ -966,6 +966,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, } } } else { /* merge_next */
vma_start_write(next); res = next; if (prev && addr < prev->vm_end) { /* case 4 */ vma_end = addr;
@@ -983,6 +984,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_pgoff = next->vm_pgoff - pglen; if (curr) { /* case 8 */ vma_pgoff = curr->vm_pgoff;
vma_start_write(curr); remove = curr; err = dup_anon_vma(next, curr); }
@@ -2373,6 +2375,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new);
vma_start_write(vma);
vma_start_write(new);
init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp);
@@ -3078,6 +3083,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, if (vma_iter_prealloc(vmi)) goto unacct_fail;
vma_start_write(vma);
init_vma_prep(&vp, vma); vma_prepare(&vp); vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
-- 2.41.0.585.gd2178a4bd4-goog