For stable tree 3.16.y, as regarding the page set into PAGE_NONE, the PFN will be inverted, when reference it by pmd_page, it needs to be inverted again controlling by protnone_mask(pfn).
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/t...
This LTP test case thp03 will get the kernel OOPS like bellow, and it could be reproduced every time.
BUG: unable to handle kernel paging request at ffffeafffd330000 IP: [<ffffffff8117f109>] __split_huge_page_pmd+0xc9/0x270 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: iTCO_wdt iTCO_vendor_support x86_pkg_temp_thermal intel_powerclamp ioatdma coretemp crct10dif_pclmul crct10dif_common aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd sb_edac edac_core lpc_ich i2c_i801 dca CPU: 0 PID: 610 Comm: thp03 Not tainted 3.14.39ltsi-WR7.0.0.28_standard #1 Hardware name: Intel Corporation SandyBridge Platform/To be filled by O.E.M., BIOS CCFRCLC0.019.1308201516 08/20/2013 task: ffff8800b5a83040 ti: ffff880138b94000 task.ti: ffff880138b94000 RIP: 0010:[<ffffffff8117f109>] [<ffffffff8117f109>] __split_huge_page_pmd+0xc9/0x270 RSP: 0018:ffff880138b97d08 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8801b8bd7220 RCX: 0000000000000008 RDX: 000000fffd330000 RSI: 00007f30c8800000 RDI: 0000000000000001 RBP: ffff880138b97d48 R08: ffff880138b29980 R09: ffff880138bd7220 R10: 00000007f30c86c0 R11: ffffea0004e2f5f0 R12: 00007f30c8a00000 R13: ffffeafffd330000 R14: ffffea0000000000 R15: 00007f30c8800000 FS: 00007f30c8ec1700(0000) GS:ffff88013b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffeafffd330000 CR3: 0000000236ca8000 CR4: 0000000000160770 Stack: ffff880138b29980 ffff880138bd7220 ffffea0004e2f5f0 00007f30c88c0000 ffff880138bd7220 00007f30c88c0000 ffff880138b289c0 0000000000000000 ffff880138b97d68 ffffffff8117fda6 ffff880237391f80 00007f30c88c0000 Call Trace: [<ffffffff8117fda6>] split_huge_page_pmd_mm+0x46/0x50 [<ffffffff8117fdda>] split_huge_page_address+0x2a/0x30 [<ffffffff8117fea9>] __vma_adjust_trans_huge+0xc9/0xf0 [<ffffffff81151d05>] vma_adjust+0x6a5/0x710 [<ffffffff81151f55>] __split_vma.isra.33+0x1e5/0x200 [<ffffffff81152d59>] split_vma+0x29/0x30 [<ffffffff81147e56>] SyS_madvise+0x6a6/0x720 [<ffffffff81a92105>] system_call_fastpath+0x26/0x2b
After applying these patches:
thp03 1 TPASS : system didn't crash, pass.
Tom Lendacky (1): x86/mm: Simplify p[g4um]d_page() macros
Toshi Kani (3): x86/asm: Add pud/pmd mask interfaces to handle large PAT bit x86/asm: Move PUD_PAGE macros to page_types.h x86/asm: Fix pud/pmd interfaces to handle large PAT bit
arch/x86/include/asm/page_64_types.h | 3 --- arch/x86/include/asm/page_types.h | 3 +++ arch/x86/include/asm/pgtable.h | 19 ++++++++++------- arch/x86/include/asm/pgtable_types.h | 40 ++++++++++++++++++++++++++++++++---- 4 files changed, 51 insertions(+), 14 deletions(-)
From: Toshi Kani toshi.kani@hpe.com
commit 4be4c1fb9a754b100466ebaec50f825be0b2050b upstream.
The PAT bit gets relocated to bit 12 when PUD and PMD mappings are used. This bit 12, however, is not covered by PTE_FLAGS_MASK, which is used for masking pfn and flags for all levels.
Add pud/pmd mask interfaces to handle pfn and flags properly by using P?D_PAGE_MASK when PUD/PMD mappings are used, i.e. PSE bit is set.
Suggested-by: Juergen Gross jgross@suse.com Signed-off-by: Toshi Kani toshi.kani@hpe.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Juergen Gross jgross@suse.com Cc: H. Peter Anvin hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Konrad Wilk konrad.wilk@oracle.com Cc: Robert Elliot elliott@hpe.com Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-4-git-send-email-toshi.kani@hpe.co... Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Wenkuan Wang Wenkuan.Wang@windriver.com --- arch/x86/include/asm/pgtable_types.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 32f1f8c..f2939cc 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -257,10 +257,10 @@
#include <linux/types.h>
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */ #define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */ #define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; @@ -329,11 +329,43 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #endif
+static inline pudval_t pud_pfn_mask(pud_t pud) +{ + if (native_pud_val(pud) & _PAGE_PSE) + return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pudval_t pud_flags_mask(pud_t pud) +{ + if (native_pud_val(pud) & _PAGE_PSE) + return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK); + else + return ~PTE_PFN_MASK; +} + static inline pudval_t pud_flags(pud_t pud) { return native_pud_val(pud) & PTE_FLAGS_MASK; }
+static inline pmdval_t pmd_pfn_mask(pmd_t pmd) +{ + if (native_pmd_val(pmd) & _PAGE_PSE) + return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pmdval_t pmd_flags_mask(pmd_t pmd) +{ + if (native_pmd_val(pmd) & _PAGE_PSE) + return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK); + else + return ~PTE_PFN_MASK; +} + static inline pmdval_t pmd_flags(pmd_t pmd) { return native_pmd_val(pmd) & PTE_FLAGS_MASK;
From: Toshi Kani toshi.kani@hpe.com
commit 832102671855f73962e7a04fdafd48b9385ea5c6 upstream.
PUD_SHIFT is defined according to a given kernel configuration, which allows it be commonly used by any x86 kernels. However, PUD_PAGE_SIZE and PUD_PAGE_MASK, which are set from PUD_SHIFT, are defined in page_64_types.h, which can be used by 64-bit kernel only.
Move PUD_PAGE_SIZE and PUD_PAGE_MASK to page_types.h so that they can be used by any x86 kernels as well.
Signed-off-by: Toshi Kani toshi.kani@hpe.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Juergen Gross jgross@suse.com Cc: H. Peter Anvin hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Konrad Wilk konrad.wilk@oracle.com Cc: Robert Elliot elliott@hpe.com Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-3-git-send-email-toshi.kani@hpe.co... Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Wenkuan Wang Wenkuan.Wang@windriver.com --- arch/x86/include/asm/page_64_types.h | 3 --- arch/x86/include/asm/page_types.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 75450b2..c708f8f 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -20,9 +20,6 @@ #define MCE_STACK 4 #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - /* * Set __PAGE_OFFSET to the most negative possible address + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3..f3559c7 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -20,6 +20,9 @@ #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) + #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1))
From: Toshi Kani toshi.kani@hpe.com
commit f70abb0fc3da1b2945c92751ccda2744081bf2b7 upstream.
Now that we have pud/pmd mask interfaces, which handle pfn & flags mask properly for the large PAT bit.
Fix pud/pmd pfn & flags interfaces by replacing PTE_PFN_MASK and PTE_FLAGS_MASK with the pud/pmd mask interfaces.
Suggested-by: Juergen Gross jgross@suse.com Signed-off-by: Toshi Kani toshi.kani@hpe.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Juergen Gross jgross@suse.com Cc: H. Peter Anvin hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Konrad Wilk konrad.wilk@oracle.com Cc: Robert Elliot elliott@hpe.com Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-5-git-send-email-toshi.kani@hpe.co... Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Wenkuan Wang Wenkuan.Wang@windriver.com --- arch/x86/include/asm/pgtable.h | 14 ++++++++------ arch/x86/include/asm/pgtable_types.h | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5a10214..8a455d9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -156,14 +156,14 @@ static inline unsigned long pmd_pfn(pmd_t pmd) { phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); - return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; + return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; }
static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); - return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; + return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; }
#define pte_page(pte) pfn_to_page(pte_pfn(pte)) @@ -584,14 +584,15 @@ static inline int pmd_none(pmd_t pmd)
static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); + return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); }
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) +#define pmd_page(pmd) \ + pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
/* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -657,14 +658,15 @@ static inline int pud_present(pud_t pud)
static inline unsigned long pud_page_vaddr(pud_t pud) { - return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); + return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); }
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) +#define pud_page(pud) \ + pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
/* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f2939cc..a5d25ab 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -347,7 +347,7 @@ static inline pudval_t pud_flags_mask(pud_t pud)
static inline pudval_t pud_flags(pud_t pud) { - return native_pud_val(pud) & PTE_FLAGS_MASK; + return native_pud_val(pud) & pud_flags_mask(pud); }
static inline pmdval_t pmd_pfn_mask(pmd_t pmd) @@ -368,7 +368,7 @@ static inline pmdval_t pmd_flags_mask(pmd_t pmd)
static inline pmdval_t pmd_flags(pmd_t pmd) { - return native_pmd_val(pmd) & PTE_FLAGS_MASK; + return native_pmd_val(pmd) & pmd_flags_mask(pmd); }
static inline pte_t native_make_pte(pteval_t val)
On Tue, 2018-12-25 at 18:44 +0800, Wenkuan Wang wrote:
From: Toshi Kani toshi.kani@hpe.com
commit f70abb0fc3da1b2945c92751ccda2744081bf2b7 upstream.
Now that we have pud/pmd mask interfaces, which handle pfn & flags mask properly for the large PAT bit.
Fix pud/pmd pfn & flags interfaces by replacing PTE_PFN_MASK and PTE_FLAGS_MASK with the pud/pmd mask interfaces.
[...]
This causes its own regression, so I also queued up:
commit 70f1528747651b20c7769d3516ade369f9963237 Author: Kirill A. Shutemov kirill.shutemov@linux.intel.com Date: Mon Nov 30 11:10:33 2015 +0100
x86/mm: Fix regression with huge pages on PAE
Ben.
On 1/3/19 00:00, Ben Hutchings wrote:
On Tue, 2018-12-25 at 18:44 +0800, Wenkuan Wang wrote:
From: Toshi Kani toshi.kani@hpe.com
commit f70abb0fc3da1b2945c92751ccda2744081bf2b7 upstream.
Now that we have pud/pmd mask interfaces, which handle pfn & flags mask properly for the large PAT bit.
Fix pud/pmd pfn & flags interfaces by replacing PTE_PFN_MASK and PTE_FLAGS_MASK with the pud/pmd mask interfaces.
[...]
This causes its own regression, so I also queued up:
Thanks for the reminding, of course, it should be added and sorry for missing it, the patch 4/4 is to fix the problem using the pmd_pfn to undo the inversion, other three patches are depended by 4/4 to address the compiling issues.
Regards Wenkuan
commit 70f1528747651b20c7769d3516ade369f9963237 Author: Kirill A. Shutemov kirill.shutemov@linux.intel.com Date: Mon Nov 30 11:10:33 2015 +0100
x86/mm: Fix regression with huge pages on PAE
Ben.
From: Tom Lendacky thomas.lendacky@amd.com
commit fd7e315988b784509ba3f1b42f539bd0b1fca9bb upstream.
Create a pgd_pfn() macro similar to the p[4um]d_pfn() macros and then use the p[g4um]d_pfn() macros in the p[g4um]d_page() macros instead of duplicating the code.
Signed-off-by: Tom Lendacky thomas.lendacky@amd.com Reviewed-by: Thomas Gleixner tglx@linutronix.de Reviewed-by: Borislav Petkov bp@suse.de Cc: Alexander Potapenko glider@google.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Andy Lutomirski luto@kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Borislav Petkov bp@alien8.de Cc: Brijesh Singh brijesh.singh@amd.com Cc: Dave Young dyoung@redhat.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Jonathan Corbet corbet@lwn.net Cc: Konrad Rzeszutek Wilk konrad.wilk@oracle.com Cc: Larry Woodman lwoodman@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Matt Fleming matt@codeblueprint.co.uk Cc: Michael S. Tsirkin mst@redhat.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: Rik van Riel riel@redhat.com Cc: Toshimitsu Kani toshi.kani@hpe.com Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/e61eb533a6d0aac941db2723d8aa63ef6b882dee.1500319216... Signed-off-by: Ingo Molnar mingo@kernel.org [Backported to 4.9 stable by AK, suggested by Michael Hocko] Signed-off-by: Andi Kleen ak@linux.intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Wenkuan Wang Wenkuan.Wang@windriver.com --- arch/x86/include/asm/pgtable.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a455d9..f8dad5a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -166,6 +166,11 @@ static inline unsigned long pud_pfn(pud_t pud) return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; }
+static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte) @@ -591,8 +596,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) \ - pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -665,8 +669,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pud_page(pud) \ - pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT) +#define pud_page(pud) pfn_to_page(pud_pfn(pud))
/* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -706,7 +709,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) +#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */ static inline unsigned long pud_index(unsigned long address)
On Tue, 2018-12-25 at 18:44 +0800, Wenkuan Wang wrote:
For stable tree 3.16.y, as regarding the page set into PAGE_NONE, the PFN will be inverted, when reference it by pmd_page, it needs to be inverted again controlling by protnone_mask(pfn).
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/t...
This LTP test case thp03 will get the kernel OOPS like bellow, and it could be reproduced every time.
[...]
After applying these patches:
thp03 1 TPASS : system didn't crash, pass.
[...]
Thank you very much for testing and tracking this down. I've queued up these patches for the next update.
Ben.
linux-stable-mirror@lists.linaro.org