Look at one of the code snippets:
162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 }
There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171.
v2: add the commit message above.
Signed-off-by: Jike Song albcamus@gmail.com Cc: David Woodhouse dwmw@amazon.co.uk Cc: Alan Cox gnomes@lxorguk.ukuu.org.uk Cc: Jiri Koshina jikos@kernel.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Tim Chen tim.c.chen@linux.intel.com Cc: Andi Lutomirski luto@amacapital.net Cc: Andi Kleen ak@linux.intel.com Cc: Peter Zijlstra peterz@infradead.org Cc: Paul Turner pjt@google.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Greg KH gregkh@linux-foundation.org Cc: Dave Hansen dave.hansen@intel.com Cc: Kees Cook keescook@google.com Cc: stable@vger.kernel.org Signed-off-by: Jike Song albcamus@gmail.com --- arch/x86/mm/pti.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 43d4a4a29037..dc611d039bd5 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (!new_p4d_page) return NULL;
- if (pgd_none(*pgd)) { - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); - new_p4d_page = 0; - } - if (new_p4d_page) - free_page(new_p4d_page); + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } BUILD_BUG_ON(pgd_large(*pgd) != 0);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pud_page) return NULL;
- if (p4d_none(*p4d)) { - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); - new_pud_page = 0; - } - if (new_pud_page) - free_page(new_pud_page); + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); }
pud = pud_offset(p4d, address); @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pmd_page) return NULL;
- if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); - new_pmd_page = 0; - } - if (new_pmd_page) - free_page(new_pmd_page); + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); }
return pmd_offset(pud, address); @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!new_pte_page) return NULL;
- if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); - new_pte_page = 0; - } - if (new_pte_page) - free_page(new_pte_page); + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); }
pte = pte_offset_kernel(pmd, address);
On Sun, Jan 07, 2018 at 06:33:17PM +0800, Jike Song wrote:
Look at one of the code snippets:
162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 }
There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171.
I think this is a remnant from the kaiser version which did this:
if (pud_none(*pud)) { unsigned long new_pmd_page = __get_free_page(gfp); if (!new_pmd_page) return NULL; spin_lock(&shadow_table_allocation_lock); if (pud_none(*pud)) set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); else free_page(new_pmd_page); spin_unlock(&shadow_table_allocation_lock); }
I was wondering too, why the duplicated checks.
Which has this explanation about the need for the locking:
/* * At runtime, the only things we map are some things for CPU * hotplug, and stacks for new processes. No two CPUs will ever * be populating the same addresses, so we only need to ensure * that we protect between two CPUs trying to allocate and * populate the same page table page. * * Only take this lock when doing a set_p[4um]d(), but it is not * needed for doing a set_pte(). We assume that only the *owner* * of a given allocation will be doing this for _their_ * allocation. * * This ensures that once a system has been running for a while * and there have been stacks all over and these page tables * are fully populated, there will be no further acquisitions of * this lock. */ static DEFINE_SPINLOCK(shadow_table_allocation_lock);
Now I have my suspicions why that's not needed anymore upstream but I'd let tglx explain better.
On Sun, 7 Jan 2018, Borislav Petkov wrote:
On Sun, Jan 07, 2018 at 06:33:17PM +0800, Jike Song wrote:
Look at one of the code snippets:
162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 }
There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171.
I think this is a remnant from the kaiser version which did this:
if (pud_none(*pud)) { unsigned long new_pmd_page = __get_free_page(gfp); if (!new_pmd_page) return NULL; spin_lock(&shadow_table_allocation_lock); if (pud_none(*pud)) set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); else free_page(new_pmd_page); spin_unlock(&shadow_table_allocation_lock); }
I was wondering too, why the duplicated checks.
Which has this explanation about the need for the locking:
/*
- At runtime, the only things we map are some things for CPU
- hotplug, and stacks for new processes. No two CPUs will ever
- be populating the same addresses, so we only need to ensure
- that we protect between two CPUs trying to allocate and
- populate the same page table page.
- Only take this lock when doing a set_p[4um]d(), but it is not
- needed for doing a set_pte(). We assume that only the *owner*
- of a given allocation will be doing this for _their_
- allocation.
- This ensures that once a system has been running for a while
- and there have been stacks all over and these page tables
- are fully populated, there will be no further acquisitions of
- this lock.
*/ static DEFINE_SPINLOCK(shadow_table_allocation_lock);
Now I have my suspicions why that's not needed anymore upstream but I'd let tglx explain better.
We got rid of all that runtime mapping stuff and the functions are only called from pti_init(). So the locking and therefor the tests above are not needed anymore. While at it we should mark all those function __init.
Thanks,
tglx
On 01/07/2018 04:05 AM, Thomas Gleixner wrote:
static DEFINE_SPINLOCK(shadow_table_allocation_lock);
Now I have my suspicions why that's not needed anymore upstream but I'd let tglx explain better.
We got rid of all that runtime mapping stuff and the functions are only called from pti_init(). So the locking and therefor the tests above are not needed anymore. While at it we should mark all those function __init.
Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken it when we needed to *make* an entry to avoid collisions.
Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check.
^^ Just in case someone wants to include a bit more first-hand information about wtf that code was doing there.
Look at one of the code snippets:
162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 }
There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171.
To quote Dave Hansen:
Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken it when we needed to *make* an entry to avoid collisions.
Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check.
v3: mark functions __init; add first-hand explanation v2: add commit message.
Signed-off-by: Jike Song albcamus@gmail.com Cc: David Woodhouse dwmw@amazon.co.uk Cc: Alan Cox gnomes@lxorguk.ukuu.org.uk Cc: Jiri Koshina jikos@kernel.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Tim Chen tim.c.chen@linux.intel.com Cc: Andi Lutomirski luto@amacapital.net Cc: Andi Kleen ak@linux.intel.com Cc: Peter Zijlstra peterz@infradead.org Cc: Paul Turner pjt@google.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Greg KH gregkh@linux-foundation.org Cc: Dave Hansen dave.hansen@intel.com Cc: Kees Cook keescook@google.com Cc: Borislav Petkov bp@alien8.de Cc: stable@vger.kernel.org --- arch/x86/mm/pti.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 43d4a4a29037..ce38f165489b 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * * Returns a pointer to a P4D on success, or NULL on failure. */ -static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) { pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (!new_p4d_page) return NULL;
- if (pgd_none(*pgd)) { - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); - new_p4d_page = 0; - } - if (new_p4d_page) - free_page(new_p4d_page); + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } BUILD_BUG_ON(pgd_large(*pgd) != 0);
@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) * * Returns a pointer to a PMD on success, or NULL on failure. */ -static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); p4d_t *p4d = pti_user_pagetable_walk_p4d(address); @@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pud_page) return NULL;
- if (p4d_none(*p4d)) { - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); - new_pud_page = 0; - } - if (new_pud_page) - free_page(new_pud_page); + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); }
pud = pud_offset(p4d, address); @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pmd_page) return NULL;
- if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); - new_pmd_page = 0; - } - if (new_pmd_page) - free_page(new_pmd_page); + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); }
return pmd_offset(pud, address); @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!new_pte_page) return NULL;
- if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); - new_pte_page = 0; - } - if (new_pte_page) - free_page(new_pte_page); + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); }
pte = pte_offset_kernel(pmd, address);
linux-stable-mirror@lists.linaro.org