We cannot proceed booting if the machine doesn't support the paging mode
kernel was compiled for.
Getting error the usual way -- via validate_cpu() -- is not going to
work. We need to enable appropriate paging mode before that, otherwise
kernel would triple-fault during KASLR setup.
This code will go away once we get support for boot-time switching
between paging modes.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.14+]
---
arch/x86/boot/compressed/misc.c | 16 ++++++++++++++++
arch/x86/boot/compressed/pgtable_64.c | 2 +-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..f7f8d9f76e15 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
}
}
+static int l5_supported(void)
+{
+ /* Check if leaf 7 is supported. */
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+ error("This linux kernel as configured requires 5-level paging\n"
+ "This CPU does not support the required 'cr4.la57' feature\n"
+ "Unable to boot - please use a kernel appropriate for your CPU\n");
+ }
+
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index eed3a2c3b577..7bcf03b376da 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -2,7 +2,7 @@
int l5_paging_required(void)
{
- /* Check i leaf 7 is supported. */
+ /* Check if leaf 7 is supported. */
if (native_cpuid_eax(0) < 7)
return 0;
--
2.15.0
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d8a1a000555ecd1b824ac1ed6df8fe364dfbbbb0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust(a)primarydata.com>
Date: Fri, 3 Nov 2017 08:00:11 -0400
Subject: [PATCH] nfsd: Fix another OPEN stateid race
If nfsd4_process_open2() is initialising a new stateid, and yet the
call to nfs4_get_vfs_file() fails for some reason, then we must
declare the stateid closed, and unhash it before dropping the mutex.
Right now, we unhash the stateid after dropping the mutex, and without
changing the stateid type, meaning that another OPEN could theoretically
look it up and attempt to use it.
Reported-by: Andrew W Elble <aweits(a)rit.edu>
Signed-off-by: Trond Myklebust <trond.myklebust(a)primarydata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields(a)redhat.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ee8fde2dfa92..457f0e7ece74 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4502,6 +4502,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_ol_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
+ bool new_stp = false;
/*
* Lookup file; if found, lookup stateid and check open request,
@@ -4521,11 +4522,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
+ if (!stp) {
+ stp = init_open_stateid(fp, open);
+ if (!open->op_stp)
+ new_stp = true;
+ }
+
/*
* OPEN the file, or upgrade an existing OPEN.
* If truncate fails, the OPEN fails.
+ *
+ * stp is already locked.
*/
- if (stp) {
+ if (!new_stp) {
/* Stateid was found, this is an OPEN upgrade */
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
@@ -4533,22 +4542,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
} else {
- /* stp is returned locked. */
- stp = init_open_stateid(fp, open);
- /* See if we lost the race to some other thread */
- if (stp->st_access_bmap != 0) {
- status = nfs4_upgrade_open(rqstp, fp, current_fh,
- stp, open);
- if (status) {
- mutex_unlock(&stp->st_mutex);
- goto out;
- }
- goto upgrade_out;
- }
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- mutex_unlock(&stp->st_mutex);
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
@@ -4557,7 +4555,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
-upgrade_out:
+
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
On Mon, Dec 04, 2017 at 04:47:00PM +0100, Christian Hesse wrote:
> Amit Pundir <amit.pundir(a)linaro.org> on Mon, 2017/11/27 18:23:
> > Hi Greg,
> >
> > Found few e100e upstream fixes from Benjamin Poirier in lede
> > source tree, https://git.lede-project.org/?p=source.git, and
> > these fixes seem reasonable enough for 4.14.y too.
> >
> > Also submitting an e1000e buffer overrun fix by Sasha Neftin.
> >
> > Cherry-picked and build tested for linux v4.14.2 for ARCH=arm/arm64.
> >
> > Regards,
> > Amit Pundir
> >
> >
> > Benjamin Poirier (4):
> > e1000e: Fix error path in link detection
> > e1000e: Fix return value test
> > e1000e: Separate signaling for link check/link up
> > e1000e: Avoid receiver overrun interrupt bursts
> >
> > Sasha Neftin (1):
> > e1000e: fix buffer overrun while the I219 is processing DMA
> > transactions
>
> Hello everybody,
>
> looks like one of these breaks connectivity on my Thinkpad X250.
> Just downgraded to linux 4.14.2 to verify.
Can you try the -rc release I just did? It has a fix for this series in
it.
thanks,
greg k-h
Changes since v2 [1]:
* Add a comment for the vma_is_fsdax() check in get_vaddr_frames() (Jan)
* Collect Jan's Reviewed-by.
* Rebased on v4.15-rc1
[1]: https://lists.01.org/pipermail/linux-nvdimm/2017-November/013295.html
The summary text below is unchanged from v2.
---
Andrew,
Here is a new get_user_pages api for cases where a driver intends to
keep an elevated page count indefinitely. This is distinct from usages
like iov_iter_get_pages where the elevated page counts are transient.
The iov_iter_get_pages cases immediately turn around and submit the
pages to a device driver which will put_page when the i/o operation
completes (under kernel control).
In the longterm case userspace is responsible for dropping the page
reference at some undefined point in the future. This is untenable for
filesystem-dax case where the filesystem is in control of the lifetime
of the block / page and needs reasonable limits on how long it can wait
for pages in a mapping to become idle.
Fixing filesystems to actually wait for dax pages to be idle before
blocks from a truncate/hole-punch operation are repurposed is saved for
a later patch series.
Also, allowing longterm registration of dax mappings is a future patch
series that introduces a "map with lease" semantic where the kernel can
revoke a lease and force userspace to drop its page references.
I have also tagged these for -stable to purposely break cases that might
assume that longterm memory registrations for filesystem-dax mappings
were supported by the kernel. The behavior regression this policy change
implies is one of the reasons we maintain the "dax enabled. Warning:
EXPERIMENTAL, use at your own risk" notification when mounting a
filesystem in dax mode.
It is worth noting the device-dax interface does not suffer the same
constraints since it does not support file space management operations
like hole-punch.
---
Dan Williams (4):
mm: introduce get_user_pages_longterm
mm: fail get_vaddr_frames() for filesystem-dax mappings
[media] v4l2: disable filesystem-dax mapping support
IB/core: disable memory registration of fileystem-dax vmas
drivers/infiniband/core/umem.c | 2 -
drivers/media/v4l2-core/videobuf-dma-sg.c | 5 +-
include/linux/fs.h | 14 ++++++
include/linux/mm.h | 13 ++++++
mm/frame_vector.c | 12 +++++
mm/gup.c | 64 +++++++++++++++++++++++++++++
6 files changed, 107 insertions(+), 3 deletions(-)
From: Christoffer Dall <christoffer.dall(a)linaro.org>
We are incorrectly rearranging 32-bit words inside a 64-bit typed value
for big endian systems, which would result in never marking a virtual
interrupt as inactive on big endian systems (assuming 32 or fewer LRs on
the hardware). Fix this by not doing any word order manipulation for
the typed values.
Cc: <stable(a)vger.kernel.org>
Acked-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
virt/kvm/arm/hyp/vgic-v2-sr.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index a3f18d362366..d7fd46fe9efb 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -34,11 +34,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
else
elrsr1 = 0;
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
}
static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
--
2.14.2
From: Marc Zyngier <marc.zyngier(a)arm.com>
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 39-bit addresses (instead of 40-bit) and also
insufficiently checking the alignment. This patch fixes it.
This patch is the 32bit pendent of Kristina's arm64 fix, and
she deserves the actual kudos for pinpointing that one.
Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation")
Cc: <stable(a)vger.kernel.org> # 3.9
Reported-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905b..3ab8b3781bfe 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
--
2.14.2
From: Kristina Martsenko <kristina.martsenko(a)arm.com>
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 47-bit addresses (instead of 48-bit) and also
insufficiently checking the alignment. This patch fixes it.
As an example, with 4k pages, before this patch we have:
PHYS_MASK_SHIFT = 48
VTTBR_X = 37 - 24 = 13
VTTBR_BADDR_SHIFT = 13 - 1 = 12
VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000
Which is wrong, because the mask doesn't allow bit 47 of the VTTBR
address to be set, and only requires the address to be 12-bit (4k)
aligned, while it actually needs to be 13-bit (8k) aligned because we
concatenate two 4k tables.
With this patch, the mask becomes 0x0000ffffffffe000, which is what we
want.
Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions")
Cc: <stable(a)vger.kernel.org> # 3.11.x
Reviewed-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm64/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06..715d395ef45b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
--
2.14.2
From: Marc Zyngier <marc.zyngier(a)arm.com>
The current pending table parsing code assumes that we keep the
previous read of the pending bits, but keep that variable in
the current block, making sure it is discarded on each loop.
We end-up using whatever is on the stack. Who knows, it might
just be the right thing...
Fixes: 33d3bc9556a7d ("KVM: arm64: vgic-its: Read initial LPI pending table")
Cc: <stable(a)vger.kernel.org> # 4.8
Reported-by: AKASHI Takahiro <takahiro.akashi(a)linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
virt/kvm/arm/vgic/vgic-its.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 1f761a9991e7..cb2d0a2dbe5a 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -421,6 +421,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
u32 *intids;
int nr_irqs, i;
unsigned long flags;
+ u8 pendmask;
nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
if (nr_irqs < 0)
@@ -428,7 +429,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
for (i = 0; i < nr_irqs; i++) {
int byte_offset, bit_nr;
- u8 pendmask;
byte_offset = intids[i] / BITS_PER_BYTE;
bit_nr = intids[i] % BITS_PER_BYTE;
--
2.14.2