This collection of patches add the missing overflow checks in arch-specific gup.c variants for x86 and s390. Those were missed in backport of 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") as mainline had a single gup.c implementation at that point. See individual patches for details.
Vlastimil
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.9.y stable as commit 2ed768cfd895. The backport however missed that in 4.9, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm, gup: prevent get_page() race with munmap in paravirt guest"). This stable-only commit adds missing parts to x86 version, as well as s390 version, both taken from the SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz --- arch/s390/mm/gup.c | 9 ++++++--- arch/x86/mm/gup.c | 10 ++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 97fc449a7470..33a940389a6d 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head))) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } @@ -150,7 +152,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d7db45bdfb3b..551fc7fea046 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, undo_dev_pagemap(nr, nr_start, pages); return 0; } + if (unlikely(!try_get_page(page))) { + put_dev_pagemap(pgmap); + return 0; + } SetPageReferenced(page); pages[*nr] = page; - get_page(page); - put_dev_pagemap(pgmap); (*nr)++; pfn++; } while (addr += PAGE_SIZE, addr != end); @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
refs = 0; head = pmd_page(pmd); + if (WARN_ON_ONCE(page_ref_count(head) <= 0)) + return 0; page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); @@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
refs = 0; head = pud_page(pud); + if (WARN_ON_ONCE(page_ref_count(head) <= 0)) + return 0; page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);
On Fri, 2019-11-29 at 10:03 +0100, Vlastimil Babka wrote:
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.9.y stable as commit 2ed768cfd895. The backport however missed that in 4.9, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm, gup: prevent get_page() race with munmap in paravirt guest"). This stable-only commit adds missing parts to x86 version, as well as s390 version, both taken from the SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz
arch/s390/mm/gup.c | 9 ++++++--- arch/x86/mm/gup.c | 10 ++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 97fc449a7470..33a940389a6d 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page);
if (!page_cache_get_speculative(head))
if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
No need for unlikely(); WARN_ON() includes that.
if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head);|| !page_cache_get_speculative(head))) return 0;
[...]
--- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, undo_dev_pagemap(nr, nr_start, pages); return 0; }
if (unlikely(!try_get_page(page))) {
put_dev_pagemap(pgmap);
return 0;
SetPageReferenced(page); pages[*nr] = page;}
get_page(page);
put_dev_pagemap(pgmap);
This leaks a pgmap reference on success!
(*nr)++; pfn++;
} while (addr += PAGE_SIZE, addr != end); @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
Why <= 0, given we use < 0 elsewhere?
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
@@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, refs = 0; head = pud_page(pud);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
Same question here.
Ben.
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
On 12/3/19 1:22 PM, Ben Hutchings wrote:
if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head);|| !page_cache_get_speculative(head))) return 0;
[...]
--- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, undo_dev_pagemap(nr, nr_start, pages); return 0; }
if (unlikely(!try_get_page(page))) {
put_dev_pagemap(pgmap);
return 0;
SetPageReferenced(page); pages[*nr] = page;}
get_page(page);
put_dev_pagemap(pgmap);
This leaks a pgmap reference on success!
Good catch, deleted one line too many!
(*nr)++; pfn++;
} while (addr += PAGE_SIZE, addr != end); @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
Why <= 0, given we use < 0 elsewhere?
The code uses get_head_page_multiple() which boils down to atomic_add and not add_unless_zero(), so it assumes a pre-existing pin that must not go away or it's a bug (one that I've been hunting recently in this area). The check makes it explicit.
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
@@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, refs = 0; head = pud_page(pud);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
Same question here.
Same as above.
Ben.
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.14.y stable as commit 04198de24771. The backport however missed that in 4.14, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This stable-only commit fixes the s390 version, and is based on the backport in SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz --- arch/s390/mm/gup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 05c8abd864f1..9bce54eac0b0 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -39,7 +39,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head))) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -77,7 +78,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } @@ -151,7 +153,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; }
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.19.y stable as commit d972ebbf42ba. The backport however missed that in 4.19, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This stable-only commit fixes the s390 version, and is based on the backport in SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz --- arch/s390/mm/gup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 2809d11c7a28..9b5b866d8adf 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -39,7 +39,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head))) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -77,7 +78,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } @@ -151,7 +153,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; }
On Fri, Nov 29, 2019 at 10:03:48AM +0100, Vlastimil Babka wrote:
This collection of patches add the missing overflow checks in arch-specific gup.c variants for x86 and s390. Those were missed in backport of 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") as mainline had a single gup.c implementation at that point. See individual patches for details.
Queued up, thanks!
On 12/1/19 5:55 PM, Sasha Levin wrote:
On Fri, Nov 29, 2019 at 10:03:48AM +0100, Vlastimil Babka wrote:
This collection of patches add the missing overflow checks in arch-specific gup.c variants for x86 and s390. Those were missed in backport of 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") as mainline had a single gup.c implementation at that point. See individual patches for details.
Queued up, thanks!
Please replace the 4.9 version with the following fixed one, thanks to Ben:
----8<----
From fe7f18bd152094f8516d79e847fcb5453a6f8368 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka vbabka@suse.cz Date: Wed, 6 Nov 2019 16:32:57 +0100 Subject: [PATCH] mm, gup: add missing refcount overflow checks on x86 and s390
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.9.y stable as commit 2ed768cfd895. The backport however missed that in 4.9, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm, gup: prevent get_page() race with munmap in paravirt guest"). This stable-only commit adds missing parts to x86 version, as well as s390 version, both taken from the SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz --- arch/s390/mm/gup.c | 9 ++++++--- arch/x86/mm/gup.c | 9 ++++++++- 2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 97fc449a7470..cf045f56581e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs)) { *nr -= refs; return 0; } @@ -150,7 +152,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) { + if (WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs)) { *nr -= refs; return 0; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d7db45bdfb3b..82f727fbbbd2 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -202,9 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, undo_dev_pagemap(nr, nr_start, pages); return 0; } + if (unlikely(!try_get_page(page))) { + put_dev_pagemap(pgmap); + return 0; + } SetPageReferenced(page); pages[*nr] = page; - get_page(page); put_dev_pagemap(pgmap); (*nr)++; pfn++; @@ -230,6 +233,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
refs = 0; head = pmd_page(pmd); + if (WARN_ON_ONCE(page_ref_count(head) <= 0)) + return 0; page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); @@ -289,6 +294,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
refs = 0; head = pud_page(pud); + if (WARN_ON_ONCE(page_ref_count(head) <= 0)) + return 0; page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);
On Tue, Dec 03, 2019 at 01:50:26PM +0100, Vlastimil Babka wrote:
On 12/1/19 5:55 PM, Sasha Levin wrote:
On Fri, Nov 29, 2019 at 10:03:48AM +0100, Vlastimil Babka wrote:
This collection of patches add the missing overflow checks in arch-specific gup.c variants for x86 and s390. Those were missed in backport of 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") as mainline had a single gup.c implementation at that point. See individual patches for details.
Queued up, thanks!
Please replace the 4.9 version with the following fixed one, thanks to Ben:
----8<----
From fe7f18bd152094f8516d79e847fcb5453a6f8368 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka vbabka@suse.cz Date: Wed, 6 Nov 2019 16:32:57 +0100 Subject: [PATCH] mm, gup: add missing refcount overflow checks on x86 and s390
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.9.y stable as commit 2ed768cfd895. The backport however missed that in 4.9, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow.
This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm, gup: prevent get_page() race with munmap in paravirt guest"). This stable-only commit adds missing parts to x86 version, as well as s390 version, both taken from the SUSE SLES/openSUSE 4.12-based kernels.
The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them.
Signed-off-by: Vlastimil Babka vbabka@suse.cz
arch/s390/mm/gup.c | 9 ++++++--- arch/x86/mm/gup.c | 9 ++++++++- 2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 97fc449a7470..cf045f56581e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page);
if (!page_cache_get_speculative(head))
if (WARN_ON_ONCE(page_ref_count(head) < 0)
if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head);|| !page_cache_get_speculative(head)) return 0;
@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) {
- if (WARN_ON_ONCE(page_ref_count(head) < 0)
*nr -= refs; return 0; }|| !page_cache_add_speculative(head, refs)) {
@@ -150,7 +152,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) {
- if (WARN_ON_ONCE(page_ref_count(head) < 0)
*nr -= refs; return 0; }|| !page_cache_add_speculative(head, refs)) {
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d7db45bdfb3b..82f727fbbbd2 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -202,9 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, undo_dev_pagemap(nr, nr_start, pages); return 0; }
if (unlikely(!try_get_page(page))) {
put_dev_pagemap(pgmap);
return 0;
SetPageReferenced(page); pages[*nr] = page;}
put_dev_pagemap(pgmap); (*nr)++; pfn++;get_page(page);
@@ -230,6 +233,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
@@ -289,6 +294,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, refs = 0; head = pud_page(pud);
- if (WARN_ON_ONCE(page_ref_count(head) <= 0))
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page);return 0;
-- 2.24.0
Now updated, sorry I missed this earlier.
greg k-h
linux-stable-mirror@lists.linaro.org