The read-only THP for filesystems would collapse THP for file opened readonly and mapped with VM_EXEC, the intended usecase is to avoid TLB miss for large text segment. But it doesn't restrict the file types so THP could be collapsed for non-regular file, for example, block device, if it is opened readonly and mapped with EXEC permission. This may cause bugs, like [1] and [2].
This is definitely not intended usecase, so just collapsing THP for regular file in order to close the attack surface.
[1] https://lore.kernel.org/lkml/CACkBjsYwLYLRmX8GpsDpMthagWOjWWrNxqY6ZLNQVr6yx+... [2] https://lore.kernel.org/linux-mm/000000000000c6a82505ce284e4c@google.com/
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Reported-by: Hao Sun sunhao.th@gmail.com Reported-by: syzbot+aae069be1de40fb11825@syzkaller.appspotmail.com Cc: Hao Sun sunhao.th@gmail.com Cc: Matthew Wilcox willy@infradead.org Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Song Liu songliubraving@fb.com Cc: Andrea Righi andrea.righi@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Yang Shi shy828301@gmail.com --- The patch is basically based off the proposal from Hugh (https://lore.kernel.org/linux-mm/a07564a3-b2fc-9ffe-3ace-3f276075ea5c@google...). It seems Hugh is too busy to prepare the patch for formal submission (I didn't hear from him by pinging him a couple of times on mailing list), so I prepared the patch and added his SOB.
mm/khugepaged.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..e91b7271275e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
- /* Enabled via shmem mount options or sysfs settings. */ - if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { + if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR); - } + + /* Enabled via shmem mount options or sysfs settings. */ + if (shmem_file(vma->vm_file)) + return shmem_huge_enabled(vma);
/* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false;
- /* Read-only file mappings need to be aligned for THP to work. */ + /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && - !inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); + struct inode *inode = vma->vm_file->f_inode; + + return !inode_is_open_for_write(inode) && + S_ISREG(inode->i_mode); }
if (!vma->anon_vma || vma->vm_ops)
On Oct 27, 2021, at 12:52 PM, Yang Shi shy828301@gmail.com wrote:
The read-only THP for filesystems would collapse THP for file opened readonly and mapped with VM_EXEC, the intended usecase is to avoid TLB miss for large text segment. But it doesn't restrict the file types so THP could be collapsed for non-regular file, for example, block device, if it is opened readonly and mapped with EXEC permission. This may cause bugs, like [1] and [2].
This is definitely not intended usecase, so just collapsing THP for regular file in order to close the attack surface.
[1] https://lore.kernel.org/lkml/CACkBjsYwLYLRmX8GpsDpMthagWOjWWrNxqY6ZLNQVr6yx+... [2] https://lore.kernel.org/linux-mm/000000000000c6a82505ce284e4c@google.com/
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Reported-by: Hao Sun sunhao.th@gmail.com Reported-by: syzbot+aae069be1de40fb11825@syzkaller.appspotmail.com Cc: Hao Sun sunhao.th@gmail.com Cc: Matthew Wilcox willy@infradead.org Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Song Liu songliubraving@fb.com Cc: Andrea Righi andrea.righi@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Yang Shi shy828301@gmail.com
The patch is basically based off the proposal from Hugh (https://lore.kernel.org/linux-mm/a07564a3-b2fc-9ffe-3ace-3f276075ea5c@google...). It seems Hugh is too busy to prepare the patch for formal submission (I didn't hear from him by pinging him a couple of times on mailing list), so I prepared the patch and added his SOB.
mm/khugepaged.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..e91b7271275e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
- /* Enabled via shmem mount options or sysfs settings. */
- if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
- if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
Am I misreading this? If we return here for vma->vm_file, the following logic (shmem_file(), etc.) would be skipped, no?
Thanks, Song
- }
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);
/* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false;
- /* Read-only file mappings need to be aligned for THP to work. */
- /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;
return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode);
}
if (!vma->anon_vma || vma->vm_ops)
-- 2.26.2
On Wed, Oct 27, 2021 at 1:35 PM Song Liu songliubraving@fb.com wrote:
On Oct 27, 2021, at 12:52 PM, Yang Shi shy828301@gmail.com wrote:
The read-only THP for filesystems would collapse THP for file opened readonly and mapped with VM_EXEC, the intended usecase is to avoid TLB miss for large text segment. But it doesn't restrict the file types so THP could be collapsed for non-regular file, for example, block device, if it is opened readonly and mapped with EXEC permission. This may cause bugs, like [1] and [2].
This is definitely not intended usecase, so just collapsing THP for regular file in order to close the attack surface.
[1] https://lore.kernel.org/lkml/CACkBjsYwLYLRmX8GpsDpMthagWOjWWrNxqY6ZLNQVr6yx+... [2] https://lore.kernel.org/linux-mm/000000000000c6a82505ce284e4c@google.com/
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Reported-by: Hao Sun sunhao.th@gmail.com Reported-by: syzbot+aae069be1de40fb11825@syzkaller.appspotmail.com Cc: Hao Sun sunhao.th@gmail.com Cc: Matthew Wilcox willy@infradead.org Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Song Liu songliubraving@fb.com Cc: Andrea Righi andrea.righi@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Yang Shi shy828301@gmail.com
The patch is basically based off the proposal from Hugh (https://lore.kernel.org/linux-mm/a07564a3-b2fc-9ffe-3ace-3f276075ea5c@google...). It seems Hugh is too busy to prepare the patch for formal submission (I didn't hear from him by pinging him a couple of times on mailing list), so I prepared the patch and added his SOB.
mm/khugepaged.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..e91b7271275e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
Am I misreading this? If we return here for vma->vm_file, the following logic (shmem_file(), etc.) would be skipped, no?
Oh, yes, you are right. My mistake.
Andrew,
Could you please apply the below fix?
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index e91b7271275e..26f1798c88d2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,9 +445,9 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
- if (vma->vm_file) - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - + vma->vm_pgoff, HPAGE_PMD_NR)) + return false;
/* Enabled via shmem mount options or sysfs settings. */ if (shmem_file(vma->vm_file))
Thanks, Song
}
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma); /* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false;
/* Read-only file mappings need to be aligned for THP to work. */
/* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;
return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode); } if (!vma->anon_vma || vma->vm_ops)
-- 2.26.2
On Wed, 27 Oct 2021 13:44:37 -0700 Yang Shi shy828301@gmail.com wrote:
--- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
Am I misreading this? If we return here for vma->vm_file, the following logic (shmem_file(), etc.) would be skipped, no?
Oh, yes, you are right. My mistake.
Andrew,
Could you please apply the below fix?
um, how well tested are these changes?
On Wed, Oct 27, 2021 at 1:53 PM Andrew Morton akpm@linux-foundation.org wrote:
On Wed, 27 Oct 2021 13:44:37 -0700 Yang Shi shy828301@gmail.com wrote:
--- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
Am I misreading this? If we return here for vma->vm_file, the following logic (shmem_file(), etc.) would be skipped, no?
Oh, yes, you are right. My mistake.
Andrew,
Could you please apply the below fix?
um, how well tested are these changes?
I has this fix on my test machine, but somehow forgot to fold it into the original patch. The whole fix was tested by opening /dev/nullb0 readonly and mapping with PROT_EXEC, the THP was not collapsed anymore.
On Wed, Oct 27, 2021 at 12:52:21PM -0700, Yang Shi wrote:
+++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
- /* Enabled via shmem mount options or sysfs settings. */
- if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
- if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
- }
- /* Enabled via shmem mount options or sysfs settings. */
- if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);
This doesn't make sense to me. if vma->vm_file, we already returned, so this is dead code.
/* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false;
- /* Read-only file mappings need to be aligned for THP to work. */
- /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;
return !inode_is_open_for_write(inode) &&
}S_ISREG(inode->i_mode);
if (!vma->anon_vma || vma->vm_ops) -- 2.26.2
On Wed, Oct 27, 2021 at 1:50 PM Matthew Wilcox willy@infradead.org wrote:
On Wed, Oct 27, 2021 at 12:52:21PM -0700, Yang Shi wrote:
+++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false;
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
if (vma->vm_file) return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR);
}
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);
This doesn't make sense to me. if vma->vm_file, we already returned, so this is dead code.
Yes, Song mentioned the same thing. Fixed by an incremental patch.
/* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false;
/* Read-only file mappings need to be aligned for THP to work. */
/* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;
return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode); } if (!vma->anon_vma || vma->vm_ops)
-- 2.26.2
On Wed, Oct 27, 2021 at 02:32:02PM -0700, Yang Shi wrote:
This doesn't make sense to me. if vma->vm_file, we already returned, so this is dead code.
Yes, Song mentioned the same thing. Fixed by an incremental patch.
I saw that after I sent this. My email sometimes arrives slowly.
linux-stable-mirror@lists.linaro.org