The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 169113ece0f29ebe884a6cfcf57c1ace04d8a36a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon(a)arm.com>
Date: Thu, 3 Jan 2019 17:45:07 +0000
Subject: [PATCH] arm64: compat: Avoid sending SIGILL for unallocated syscall
numbers
The ARM Linux kernel handles the EABI syscall numbers as follows:
0 - NR_SYSCALLS-1 : Invoke syscall via syscall table
NR_SYSCALLS - 0xeffff : -ENOSYS (to be allocated in future)
0xf0000 - 0xf07ff : Private syscall or -ENOSYS if not allocated
> 0xf07ff : SIGILL
Our compat code gets this wrong and ends up sending SIGILL in response
to all syscalls greater than NR_SYSCALLS which have a value greater
than 0x7ff in the bottom 16 bits.
Fix this by defining the end of the ARM private syscall region and
checking the syscall number against that directly. Update the comment
while we're at it.
Cc: <stable(a)vger.kernel.org>
Cc: Dave Martin <Dave.Martin(a)arm.com>
Reported-by: Pi-Hsun Shih <pihsun(a)chromium.org>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index b13ca091f833..85d5c1026204 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -40,8 +40,9 @@
* The following SVCs are ARM private.
*/
#define __ARM_NR_COMPAT_BASE 0x0f0000
-#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
-#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
+#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2)
+#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
+#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 399
#endif
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 32653d156747..a79db4e485a6 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -102,12 +102,12 @@ long compat_arm_syscall(struct pt_regs *regs)
default:
/*
- * Calls 9f00xx..9f07ff are defined to return -ENOSYS
+ * Calls 0xf0xxx..0xf07ff are defined to return -ENOSYS
* if not implemented, rather than raising SIGILL. This
* way the calling program can gracefully determine whether
* a feature is supported.
*/
- if ((no & 0xffff) <= 0x7ff)
+ if (no < __ARM_NR_COMPAT_END)
return -ENOSYS;
break;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 169113ece0f29ebe884a6cfcf57c1ace04d8a36a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon(a)arm.com>
Date: Thu, 3 Jan 2019 17:45:07 +0000
Subject: [PATCH] arm64: compat: Avoid sending SIGILL for unallocated syscall
numbers
The ARM Linux kernel handles the EABI syscall numbers as follows:
0 - NR_SYSCALLS-1 : Invoke syscall via syscall table
NR_SYSCALLS - 0xeffff : -ENOSYS (to be allocated in future)
0xf0000 - 0xf07ff : Private syscall or -ENOSYS if not allocated
> 0xf07ff : SIGILL
Our compat code gets this wrong and ends up sending SIGILL in response
to all syscalls greater than NR_SYSCALLS which have a value greater
than 0x7ff in the bottom 16 bits.
Fix this by defining the end of the ARM private syscall region and
checking the syscall number against that directly. Update the comment
while we're at it.
Cc: <stable(a)vger.kernel.org>
Cc: Dave Martin <Dave.Martin(a)arm.com>
Reported-by: Pi-Hsun Shih <pihsun(a)chromium.org>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index b13ca091f833..85d5c1026204 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -40,8 +40,9 @@
* The following SVCs are ARM private.
*/
#define __ARM_NR_COMPAT_BASE 0x0f0000
-#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
-#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
+#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2)
+#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
+#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 399
#endif
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 32653d156747..a79db4e485a6 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -102,12 +102,12 @@ long compat_arm_syscall(struct pt_regs *regs)
default:
/*
- * Calls 9f00xx..9f07ff are defined to return -ENOSYS
+ * Calls 0xf0xxx..0xf07ff are defined to return -ENOSYS
* if not implemented, rather than raising SIGILL. This
* way the calling program can gracefully determine whether
* a feature is supported.
*/
- if ((no & 0xffff) <= 0x7ff)
+ if (no < __ARM_NR_COMPAT_END)
return -ENOSYS;
break;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fb544d1ca65a89f7a3895f7531221ceeed74ada7 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall(a)arm.com>
Date: Tue, 11 Dec 2018 13:23:57 +0100
Subject: [PATCH] KVM: arm/arm64: Fix VMID alloc race by reverting to lock-less
We recently addressed a VMID generation race by introducing a read/write
lock around accesses and updates to the vmid generation values.
However, kvm_arch_vcpu_ioctl_run() also calls need_new_vmid_gen() but
does so without taking the read lock.
As far as I can tell, this can lead to the same kind of race:
VM 0, VCPU 0 VM 0, VCPU 1
------------ ------------
update_vttbr (vmid 254)
update_vttbr (vmid 1) // roll over
read_lock(kvm_vmid_lock);
force_vm_exit()
local_irq_disable
need_new_vmid_gen == false //because vmid gen matches
enter_guest (vmid 254)
kvm_arch.vttbr = <PGD>:<VMID 1>
read_unlock(kvm_vmid_lock);
enter_guest (vmid 1)
Which results in running two VCPUs in the same VM with different VMIDs
and (even worse) other VCPUs from other VMs could now allocate clashing
VMID 254 from the new generation as long as VCPU 0 is not exiting.
Attempt to solve this by making sure vttbr is updated before another CPU
can observe the updated VMID generation.
Cc: stable(a)vger.kernel.org
Fixes: f0cf47d939d0 "KVM: arm/arm64: Close VMID generation race"
Reviewed-by: Julien Thierry <julien.thierry(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 4adcee5fc126..d9273f972828 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -66,7 +66,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u32 kvm_next_vmid;
static unsigned int kvm_vmid_bits __read_mostly;
-static DEFINE_RWLOCK(kvm_vmid_lock);
+static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
@@ -484,7 +484,9 @@ void force_vm_exit(const cpumask_t *mask)
*/
static bool need_new_vmid_gen(struct kvm *kvm)
{
- return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+ u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
+ smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
+ return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
}
/**
@@ -499,16 +501,11 @@ static void update_vttbr(struct kvm *kvm)
{
phys_addr_t pgd_phys;
u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
- bool new_gen;
- read_lock(&kvm_vmid_lock);
- new_gen = need_new_vmid_gen(kvm);
- read_unlock(&kvm_vmid_lock);
-
- if (!new_gen)
+ if (!need_new_vmid_gen(kvm))
return;
- write_lock(&kvm_vmid_lock);
+ spin_lock(&kvm_vmid_lock);
/*
* We need to re-check the vmid_gen here to ensure that if another vcpu
@@ -516,7 +513,7 @@ static void update_vttbr(struct kvm *kvm)
* use the same vmid.
*/
if (!need_new_vmid_gen(kvm)) {
- write_unlock(&kvm_vmid_lock);
+ spin_unlock(&kvm_vmid_lock);
return;
}
@@ -539,7 +536,6 @@ static void update_vttbr(struct kvm *kvm)
kvm_call_hyp(__kvm_flush_vm_context);
}
- kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
kvm->arch.vmid = kvm_next_vmid;
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
@@ -550,7 +546,10 @@ static void update_vttbr(struct kvm *kvm)
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
- write_unlock(&kvm_vmid_lock);
+ smp_wmb();
+ WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen));
+
+ spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fb544d1ca65a89f7a3895f7531221ceeed74ada7 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall(a)arm.com>
Date: Tue, 11 Dec 2018 13:23:57 +0100
Subject: [PATCH] KVM: arm/arm64: Fix VMID alloc race by reverting to lock-less
We recently addressed a VMID generation race by introducing a read/write
lock around accesses and updates to the vmid generation values.
However, kvm_arch_vcpu_ioctl_run() also calls need_new_vmid_gen() but
does so without taking the read lock.
As far as I can tell, this can lead to the same kind of race:
VM 0, VCPU 0 VM 0, VCPU 1
------------ ------------
update_vttbr (vmid 254)
update_vttbr (vmid 1) // roll over
read_lock(kvm_vmid_lock);
force_vm_exit()
local_irq_disable
need_new_vmid_gen == false //because vmid gen matches
enter_guest (vmid 254)
kvm_arch.vttbr = <PGD>:<VMID 1>
read_unlock(kvm_vmid_lock);
enter_guest (vmid 1)
Which results in running two VCPUs in the same VM with different VMIDs
and (even worse) other VCPUs from other VMs could now allocate clashing
VMID 254 from the new generation as long as VCPU 0 is not exiting.
Attempt to solve this by making sure vttbr is updated before another CPU
can observe the updated VMID generation.
Cc: stable(a)vger.kernel.org
Fixes: f0cf47d939d0 "KVM: arm/arm64: Close VMID generation race"
Reviewed-by: Julien Thierry <julien.thierry(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 4adcee5fc126..d9273f972828 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -66,7 +66,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u32 kvm_next_vmid;
static unsigned int kvm_vmid_bits __read_mostly;
-static DEFINE_RWLOCK(kvm_vmid_lock);
+static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
@@ -484,7 +484,9 @@ void force_vm_exit(const cpumask_t *mask)
*/
static bool need_new_vmid_gen(struct kvm *kvm)
{
- return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+ u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
+ smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
+ return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
}
/**
@@ -499,16 +501,11 @@ static void update_vttbr(struct kvm *kvm)
{
phys_addr_t pgd_phys;
u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
- bool new_gen;
- read_lock(&kvm_vmid_lock);
- new_gen = need_new_vmid_gen(kvm);
- read_unlock(&kvm_vmid_lock);
-
- if (!new_gen)
+ if (!need_new_vmid_gen(kvm))
return;
- write_lock(&kvm_vmid_lock);
+ spin_lock(&kvm_vmid_lock);
/*
* We need to re-check the vmid_gen here to ensure that if another vcpu
@@ -516,7 +513,7 @@ static void update_vttbr(struct kvm *kvm)
* use the same vmid.
*/
if (!need_new_vmid_gen(kvm)) {
- write_unlock(&kvm_vmid_lock);
+ spin_unlock(&kvm_vmid_lock);
return;
}
@@ -539,7 +536,6 @@ static void update_vttbr(struct kvm *kvm)
kvm_call_hyp(__kvm_flush_vm_context);
}
- kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
kvm->arch.vmid = kvm_next_vmid;
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
@@ -550,7 +546,10 @@ static void update_vttbr(struct kvm *kvm)
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
- write_unlock(&kvm_vmid_lock);
+ smp_wmb();
+ WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen));
+
+ spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
The following commit fixes freezes in virtio device drivers when KVM
is nested under
VMWare Workstation/ESXi or Hyper-V. I've encountered problems running KVM
inside VMWare since upgrading to Debian 9 (currently testing 4.9.88-1+deb9u1).
d391f1207067268261add0485f0f34503539c5b0
The same issue affects 4.4.y as well. A git-bisect within my
environment stopped at
e9ea5069d9e569c32ab913c39467df32e056b3a7, where the KVM capability was added
that QEMU checks before enabling fast mmio.
Thanks,
Mike
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ff4dd232ec45a0e45ea69f28f069f2ab22b4908a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton(a)mips.com>
Date: Tue, 4 Dec 2018 23:44:12 +0000
Subject: [PATCH] MIPS: Expand MIPS32 ASIDs to 64 bits
ASIDs have always been stored as unsigned longs, ie. 32 bits on MIPS32
kernels. This is problematic because it is feasible for the ASID version
to overflow & wrap around to zero.
We currently attempt to handle this overflow by simply setting the ASID
version to 1, using asid_first_version(), but we make no attempt to
account for the fact that there may be mm_structs with stale ASIDs that
have versions which we now reuse due to the overflow & wrap around.
Encountering this requires that:
1) A struct mm_struct X is active on CPU A using ASID (V,n).
2) That mm is not used on CPU A for the length of time that it takes
for CPU A's asid_cache to overflow & wrap around to the same
version V that the mm had in step 1. During this time tasks using
the mm could either be sleeping or only scheduled on other CPUs.
3) Some other mm Y becomes active on CPU A and is allocated the same
ASID (V,n).
4) mm X now becomes active on CPU A again, and now incorrectly has the
same ASID as mm Y.
Where struct mm_struct ASIDs are represented above in the format
(version, EntryHi.ASID), and on a typical MIPS32 system version will be
24 bits wide & EntryHi.ASID will be 8 bits wide.
The length of time required in step 2 is highly dependent upon the CPU &
workload, but for a hypothetical 2GHz CPU running a workload which
generates a new ASID every 10000 cycles this period is around 248 days.
Due to this long period of time & the fact that tasks need to be
scheduled in just the right (or wrong, depending upon your inclination)
way, this is obviously a difficult bug to encounter but it's entirely
possible as evidenced by reports.
In order to fix this, simply extend ASIDs to 64 bits even on MIPS32
builds. This will extend the period of time required for the
hypothetical system above to encounter the problem from 28 days to
around 3 trillion years, which feels safely outside of the realms of
possibility.
The cost of this is slightly more generated code in some commonly
executed paths, but this is pretty minimal:
| Code Size Gain | Percentage
-----------------------|----------------|-------------
decstation_defconfig | +270 | +0.00%
32r2el_defconfig | +652 | +0.01%
32r6el_defconfig | +1000 | +0.01%
I have been unable to measure any change in performance of the LMbench
lat_ctx or lat_proc tests resulting from the 64b ASIDs on either
32r2el_defconfig+interAptiv or 32r6el_defconfig+I6500 systems.
Signed-off-by: Paul Burton <paul.burton(a)mips.com>
Suggested-by: James Hogan <jhogan(a)kernel.org>
References: https://lore.kernel.org/linux-mips/80B78A8B8FEE6145A87579E8435D78C30205D5F3…
References: https://lore.kernel.org/linux-mips/1488684260-18867-1-git-send-email-jiwei.…
Cc: Jiwei Sun <jiwei.sun(a)windriver.com>
Cc: Yu Huabing <yhb(a)ruijie.com.cn>
Cc: stable(a)vger.kernel.org # 2.6.12+
Cc: linux-mips(a)vger.kernel.org
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index a41059d47d31..ed7ffe4e63a3 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -50,7 +50,7 @@ struct guest_info {
#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */
struct cpuinfo_mips {
- unsigned long asid_cache;
+ u64 asid_cache;
#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
unsigned long asid_mask;
#endif
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 0740be7d5d4a..24d6b42345fb 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -7,7 +7,7 @@
#include <linux/wait.h>
typedef struct {
- unsigned long asid[NR_CPUS];
+ u64 asid[NR_CPUS];
void *vdso;
atomic_t fp_mode_switching;
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 94414561de0e..a589585be21b 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -76,14 +76,14 @@ extern unsigned long pgd_current[];
* All unused by hardware upper bits will be considered
* as a software asid extension.
*/
-static unsigned long asid_version_mask(unsigned int cpu)
+static inline u64 asid_version_mask(unsigned int cpu)
{
unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
- return ~(asid_mask | (asid_mask - 1));
+ return ~(u64)(asid_mask | (asid_mask - 1));
}
-static unsigned long asid_first_version(unsigned int cpu)
+static inline u64 asid_first_version(unsigned int cpu)
{
return ~asid_version_mask(cpu) + 1;
}
@@ -102,14 +102,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
{
- unsigned long asid = asid_cache(cpu);
+ u64 asid = asid_cache(cpu);
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
if (cpu_has_vtag_icache)
flush_icache_all();
local_flush_tlb_all(); /* start new asid cycle */
- if (!asid) /* fix version if needed */
- asid = asid_first_version(cpu);
}
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 3466fcdae0ca..01848cdf2074 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -245,7 +245,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
pmd_t *pmdp;
pte_t *ptep;
- pr_debug("cpage[%08lx,%08lx]\n",
+ pr_debug("cpage[%08llx,%08lx]\n",
cpu_context(smp_processor_id(), mm), addr);
/* No ASID => no such page in the cache. */
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ff4dd232ec45a0e45ea69f28f069f2ab22b4908a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton(a)mips.com>
Date: Tue, 4 Dec 2018 23:44:12 +0000
Subject: [PATCH] MIPS: Expand MIPS32 ASIDs to 64 bits
ASIDs have always been stored as unsigned longs, ie. 32 bits on MIPS32
kernels. This is problematic because it is feasible for the ASID version
to overflow & wrap around to zero.
We currently attempt to handle this overflow by simply setting the ASID
version to 1, using asid_first_version(), but we make no attempt to
account for the fact that there may be mm_structs with stale ASIDs that
have versions which we now reuse due to the overflow & wrap around.
Encountering this requires that:
1) A struct mm_struct X is active on CPU A using ASID (V,n).
2) That mm is not used on CPU A for the length of time that it takes
for CPU A's asid_cache to overflow & wrap around to the same
version V that the mm had in step 1. During this time tasks using
the mm could either be sleeping or only scheduled on other CPUs.
3) Some other mm Y becomes active on CPU A and is allocated the same
ASID (V,n).
4) mm X now becomes active on CPU A again, and now incorrectly has the
same ASID as mm Y.
Where struct mm_struct ASIDs are represented above in the format
(version, EntryHi.ASID), and on a typical MIPS32 system version will be
24 bits wide & EntryHi.ASID will be 8 bits wide.
The length of time required in step 2 is highly dependent upon the CPU &
workload, but for a hypothetical 2GHz CPU running a workload which
generates a new ASID every 10000 cycles this period is around 248 days.
Due to this long period of time & the fact that tasks need to be
scheduled in just the right (or wrong, depending upon your inclination)
way, this is obviously a difficult bug to encounter but it's entirely
possible as evidenced by reports.
In order to fix this, simply extend ASIDs to 64 bits even on MIPS32
builds. This will extend the period of time required for the
hypothetical system above to encounter the problem from 28 days to
around 3 trillion years, which feels safely outside of the realms of
possibility.
The cost of this is slightly more generated code in some commonly
executed paths, but this is pretty minimal:
| Code Size Gain | Percentage
-----------------------|----------------|-------------
decstation_defconfig | +270 | +0.00%
32r2el_defconfig | +652 | +0.01%
32r6el_defconfig | +1000 | +0.01%
I have been unable to measure any change in performance of the LMbench
lat_ctx or lat_proc tests resulting from the 64b ASIDs on either
32r2el_defconfig+interAptiv or 32r6el_defconfig+I6500 systems.
Signed-off-by: Paul Burton <paul.burton(a)mips.com>
Suggested-by: James Hogan <jhogan(a)kernel.org>
References: https://lore.kernel.org/linux-mips/80B78A8B8FEE6145A87579E8435D78C30205D5F3…
References: https://lore.kernel.org/linux-mips/1488684260-18867-1-git-send-email-jiwei.…
Cc: Jiwei Sun <jiwei.sun(a)windriver.com>
Cc: Yu Huabing <yhb(a)ruijie.com.cn>
Cc: stable(a)vger.kernel.org # 2.6.12+
Cc: linux-mips(a)vger.kernel.org
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index a41059d47d31..ed7ffe4e63a3 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -50,7 +50,7 @@ struct guest_info {
#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */
struct cpuinfo_mips {
- unsigned long asid_cache;
+ u64 asid_cache;
#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
unsigned long asid_mask;
#endif
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 0740be7d5d4a..24d6b42345fb 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -7,7 +7,7 @@
#include <linux/wait.h>
typedef struct {
- unsigned long asid[NR_CPUS];
+ u64 asid[NR_CPUS];
void *vdso;
atomic_t fp_mode_switching;
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 94414561de0e..a589585be21b 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -76,14 +76,14 @@ extern unsigned long pgd_current[];
* All unused by hardware upper bits will be considered
* as a software asid extension.
*/
-static unsigned long asid_version_mask(unsigned int cpu)
+static inline u64 asid_version_mask(unsigned int cpu)
{
unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
- return ~(asid_mask | (asid_mask - 1));
+ return ~(u64)(asid_mask | (asid_mask - 1));
}
-static unsigned long asid_first_version(unsigned int cpu)
+static inline u64 asid_first_version(unsigned int cpu)
{
return ~asid_version_mask(cpu) + 1;
}
@@ -102,14 +102,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
{
- unsigned long asid = asid_cache(cpu);
+ u64 asid = asid_cache(cpu);
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
if (cpu_has_vtag_icache)
flush_icache_all();
local_flush_tlb_all(); /* start new asid cycle */
- if (!asid) /* fix version if needed */
- asid = asid_first_version(cpu);
}
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 3466fcdae0ca..01848cdf2074 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -245,7 +245,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
pmd_t *pmdp;
pte_t *ptep;
- pr_debug("cpage[%08lx,%08lx]\n",
+ pr_debug("cpage[%08llx,%08lx]\n",
cpu_context(smp_processor_id(), mm), addr);
/* No ASID => no such page in the cache. */
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ff4dd232ec45a0e45ea69f28f069f2ab22b4908a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton(a)mips.com>
Date: Tue, 4 Dec 2018 23:44:12 +0000
Subject: [PATCH] MIPS: Expand MIPS32 ASIDs to 64 bits
ASIDs have always been stored as unsigned longs, ie. 32 bits on MIPS32
kernels. This is problematic because it is feasible for the ASID version
to overflow & wrap around to zero.
We currently attempt to handle this overflow by simply setting the ASID
version to 1, using asid_first_version(), but we make no attempt to
account for the fact that there may be mm_structs with stale ASIDs that
have versions which we now reuse due to the overflow & wrap around.
Encountering this requires that:
1) A struct mm_struct X is active on CPU A using ASID (V,n).
2) That mm is not used on CPU A for the length of time that it takes
for CPU A's asid_cache to overflow & wrap around to the same
version V that the mm had in step 1. During this time tasks using
the mm could either be sleeping or only scheduled on other CPUs.
3) Some other mm Y becomes active on CPU A and is allocated the same
ASID (V,n).
4) mm X now becomes active on CPU A again, and now incorrectly has the
same ASID as mm Y.
Where struct mm_struct ASIDs are represented above in the format
(version, EntryHi.ASID), and on a typical MIPS32 system version will be
24 bits wide & EntryHi.ASID will be 8 bits wide.
The length of time required in step 2 is highly dependent upon the CPU &
workload, but for a hypothetical 2GHz CPU running a workload which
generates a new ASID every 10000 cycles this period is around 248 days.
Due to this long period of time & the fact that tasks need to be
scheduled in just the right (or wrong, depending upon your inclination)
way, this is obviously a difficult bug to encounter but it's entirely
possible as evidenced by reports.
In order to fix this, simply extend ASIDs to 64 bits even on MIPS32
builds. This will extend the period of time required for the
hypothetical system above to encounter the problem from 28 days to
around 3 trillion years, which feels safely outside of the realms of
possibility.
The cost of this is slightly more generated code in some commonly
executed paths, but this is pretty minimal:
| Code Size Gain | Percentage
-----------------------|----------------|-------------
decstation_defconfig | +270 | +0.00%
32r2el_defconfig | +652 | +0.01%
32r6el_defconfig | +1000 | +0.01%
I have been unable to measure any change in performance of the LMbench
lat_ctx or lat_proc tests resulting from the 64b ASIDs on either
32r2el_defconfig+interAptiv or 32r6el_defconfig+I6500 systems.
Signed-off-by: Paul Burton <paul.burton(a)mips.com>
Suggested-by: James Hogan <jhogan(a)kernel.org>
References: https://lore.kernel.org/linux-mips/80B78A8B8FEE6145A87579E8435D78C30205D5F3…
References: https://lore.kernel.org/linux-mips/1488684260-18867-1-git-send-email-jiwei.…
Cc: Jiwei Sun <jiwei.sun(a)windriver.com>
Cc: Yu Huabing <yhb(a)ruijie.com.cn>
Cc: stable(a)vger.kernel.org # 2.6.12+
Cc: linux-mips(a)vger.kernel.org
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index a41059d47d31..ed7ffe4e63a3 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -50,7 +50,7 @@ struct guest_info {
#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */
struct cpuinfo_mips {
- unsigned long asid_cache;
+ u64 asid_cache;
#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
unsigned long asid_mask;
#endif
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 0740be7d5d4a..24d6b42345fb 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -7,7 +7,7 @@
#include <linux/wait.h>
typedef struct {
- unsigned long asid[NR_CPUS];
+ u64 asid[NR_CPUS];
void *vdso;
atomic_t fp_mode_switching;
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 94414561de0e..a589585be21b 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -76,14 +76,14 @@ extern unsigned long pgd_current[];
* All unused by hardware upper bits will be considered
* as a software asid extension.
*/
-static unsigned long asid_version_mask(unsigned int cpu)
+static inline u64 asid_version_mask(unsigned int cpu)
{
unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
- return ~(asid_mask | (asid_mask - 1));
+ return ~(u64)(asid_mask | (asid_mask - 1));
}
-static unsigned long asid_first_version(unsigned int cpu)
+static inline u64 asid_first_version(unsigned int cpu)
{
return ~asid_version_mask(cpu) + 1;
}
@@ -102,14 +102,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
{
- unsigned long asid = asid_cache(cpu);
+ u64 asid = asid_cache(cpu);
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
if (cpu_has_vtag_icache)
flush_icache_all();
local_flush_tlb_all(); /* start new asid cycle */
- if (!asid) /* fix version if needed */
- asid = asid_first_version(cpu);
}
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 3466fcdae0ca..01848cdf2074 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -245,7 +245,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
pmd_t *pmdp;
pte_t *ptep;
- pr_debug("cpage[%08lx,%08lx]\n",
+ pr_debug("cpage[%08llx,%08lx]\n",
cpu_context(smp_processor_id(), mm), addr);
/* No ASID => no such page in the cache. */