The Dell Inspiron 5775 is a Raven Ridge. The Enable Slot command timed
out when a USB device gets plugged:
[ 212.156326] xhci_hcd 0000:03:00.3: Error while assigning device slot ID
[ 212.156340] xhci_hcd 0000:03:00.3: Max number of devices this xHCI host supports is 64.
[ 212.156348] usb usb2-port3: couldn't allocate usb_device
AMD suggests that a delay before xHC suspends can fix the issue.
I can confirm it fixes the issue, so use the suspend delay quirk for
Raven Ridge's xHC.
Cc: stable(a)vger.kernel.org
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
---
drivers/usb/host/xhci-pci.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index d9f831b67e57..93ce34bce7b5 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -126,7 +126,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
xhci->quirks |= XHCI_AMD_PLL_FIX;
- if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb)
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ (pdev->device == 0x15e0 ||
+ pdev->device == 0x15e1 ||
+ pdev->device == 0x43bb))
xhci->quirks |= XHCI_SUSPEND_DELAY;
if (pdev->vendor == PCI_VENDOR_ID_AMD)
--
2.15.1
This is a note to let you know that I've just added the patch titled
virtio_net: check return value of skb_to_sgvec always
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
virtio_net-check-return-value-of-skb_to_sgvec-always.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From e2fcad58fd230f635a74e4e983c6f4ea893642d2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Sun, 4 Jun 2017 04:16:26 +0200
Subject: virtio_net: check return value of skb_to_sgvec always
From: Jason A. Donenfeld <Jason(a)zx2c4.com>
commit e2fcad58fd230f635a74e4e983c6f4ea893642d2 upstream.
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov(a)cogentembedded.com>
Cc: "Michael S. Tsirkin" <mst(a)redhat.com>
Cc: Jason Wang <jasowang(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Nathan Chancellor <natechancellor(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/virtio_net.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -831,7 +831,7 @@ static int xmit_skb(struct send_queue *s
struct virtio_net_hdr_mrg_rxbuf *hdr;
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
struct virtnet_info *vi = sq->vq->vdev->priv;
- unsigned num_sg;
+ int num_sg;
unsigned hdr_len = vi->hdr_len;
bool can_push;
@@ -858,11 +858,16 @@ static int xmit_skb(struct send_queue *s
if (can_push) {
__skb_push(skb, hdr_len);
num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
+ if (unlikely(num_sg < 0))
+ return num_sg;
/* Pull header back to avoid skew in tx bytes calculations. */
__skb_pull(skb, hdr_len);
} else {
sg_set_buf(sq->sg, hdr, hdr_len);
- num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+ num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
+ if (unlikely(num_sg < 0))
+ return num_sg;
+ num_sg++;
}
return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
}
Patches currently in stable-queue which might be from Jason(a)zx2c4.com are
queue-4.9/ipsec-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/skbuff-return-emsgsize-in-skb_to_sgvec-to-prevent-overflow.patch
queue-4.9/rxrpc-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/virtio_net-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/macsec-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/virtio_net-check-return-value-of-skb_to_sgvec-in-one-more-location.patch
This is a note to let you know that I've just added the patch titled
rxrpc: check return value of skb_to_sgvec always
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
rxrpc-check-return-value-of-skb_to_sgvec-always.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 89a5ea99662505d2d61f2a3030a6896c2cb3cdb0 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Sun, 4 Jun 2017 04:16:24 +0200
Subject: rxrpc: check return value of skb_to_sgvec always
From: Jason A. Donenfeld <Jason(a)zx2c4.com>
commit 89a5ea99662505d2d61f2a3030a6896c2cb3cdb0 upstream.
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Acked-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Nathan Chancellor <natechancellor(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/rxrpc/rxkad.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -229,7 +229,9 @@ static int rxkad_secure_packet_encrypt(c
len &= ~(call->conn->size_align - 1);
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, 0, len);
+ err = skb_to_sgvec(skb, sg, 0, len);
+ if (unlikely(err < 0))
+ goto out;
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
crypto_skcipher_encrypt(req);
@@ -325,7 +327,7 @@ static int rxkad_verify_packet_1(struct
struct sk_buff *trailer;
u32 data_size, buf;
u16 check;
- int nsg;
+ int nsg, ret;
_enter("");
@@ -342,7 +344,9 @@ static int rxkad_verify_packet_1(struct
goto nomem;
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, offset, 8);
+ ret = skb_to_sgvec(skb, sg, offset, 8);
+ if (unlikely(ret < 0))
+ return ret;
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
@@ -405,7 +409,7 @@ static int rxkad_verify_packet_2(struct
struct sk_buff *trailer;
u32 data_size, buf;
u16 check;
- int nsg;
+ int nsg, ret;
_enter(",{%d}", skb->len);
@@ -429,7 +433,12 @@ static int rxkad_verify_packet_2(struct
}
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, offset, len);
+ ret = skb_to_sgvec(skb, sg, offset, len);
+ if (unlikely(ret < 0)) {
+ if (sg != _sg)
+ kfree(sg);
+ return ret;
+ }
/* decrypt from the session key */
token = call->conn->params.key->payload.data[0];
Patches currently in stable-queue which might be from Jason(a)zx2c4.com are
queue-4.9/ipsec-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/skbuff-return-emsgsize-in-skb_to_sgvec-to-prevent-overflow.patch
queue-4.9/rxrpc-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/virtio_net-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/macsec-check-return-value-of-skb_to_sgvec-always.patch
queue-4.9/virtio_net-check-return-value-of-skb_to_sgvec-in-one-more-location.patch
From: Ian W MORRISON <ianwmorrison(a)gmail.com>
As the Geminilake firmware is now merged to linux-firmware.git
use MODUE_FIRMWARE to load the firmware.
This removes the error message in the dmesg log:
i915 0000:00:02.0: Direct firmware load for
i915/glk_dmc_ver1_04.bin failed with error -2
i915 0000:00:02.0: Failed to load DMC firmware
i915/glk_dmc_ver1_04.bin. Disabling runtime power management.
i915 0000:00:02.0: DMC firmware homepage:
https://01.org/linuxgraphics/downloads/firmware
and now shows that the firmware has correctly loaded:
[drm] Finished loading DMC firmware i915/glk_dmc_ver1_04.bin (v1.4)
Cc: stable(a)vger.kernel.org
Signed-off-by: Ian W MORRISON <ianwmorrison(a)gmail.com>
---
drivers/gpu/drm/i915/intel_csr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 41e6c75a7f3c..f9550ea46c26 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,6 +35,7 @@
*/
#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin"
+MODULE_FIRMWARE(I915_CSR_GLK);
#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4)
#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin"
--
2.11.0
This was sent to stable(a)vger.kernel.org almost 2 weeks ago and I haven't
heard anything back yet. Is there any update on the status? (sorry for
the bad formatting, this is the only way I can forward the email. I can
only resend the original email before the commit email from Thomas.)
Thanks.
-------- Forwarded Message --------
Subject: [tip:x86/urgent] x86/platform/UV: Fix critical UV MMR address error
Date: Wed, 28 Mar 2018 11:25:04 -0700
From: tip-bot for mike.travis(a)hpe.com <tipbot(a)zytor.com>
Reply-To: andrew.banman(a)hpe.com, hpa(a)zytor.com, tglx(a)linutronix.de,
mingo(a)kernel.org, russ.anderson(a)hpe.com, travis(a)sgi.com,
linux-kernel(a)vger.kernel.org, mike.travis(a)hpe.com, dimitri.sivanich(a)hpe.com
To: linux-tip-commits(a)vger.kernel.org
CC: dimitri.sivanich(a)hpe.com, mike.travis(a)hpe.com,
linux-kernel(a)vger.kernel.org, mingo(a)kernel.org, hpa(a)zytor.com,
tglx(a)linutronix.de, andrew.banman(a)hpe.com, russ.anderson(a)hpe.com,
travis(a)sgi.com
Commit-ID: bd47a85acd727e27b7283daff557865ad04c59f6
Gitweb:
https://git.kernel.org/tip/bd47a85acd727e27b7283daff557865ad04c59f6
Author: mike.travis(a)hpe.com <mike.travis(a)hpe.com>
AuthorDate: Wed, 28 Mar 2018 12:40:11 -0500
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitDate: Wed, 28 Mar 2018 20:19:45 +0200
x86/platform/UV: Fix critical UV MMR address error
A critical error was found testing the fixed UV4 HUB in that an MMR address
was found to be incorrect. This causes the virtual address space for
accessing the MMIOH1 region to be allocated with the incorrect size.
Fixes: 673aa20c55a1 ("x86/platform/UV: Update uv_mmrs.h to prepare for
UV4A fixes")
Signed-off-by: Mike Travis <travis(a)sgi.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Dimitri Sivanich <dimitri.sivanich(a)hpe.com>
Cc: Russ Anderson <russ.anderson(a)hpe.com>
Cc: Andrew Banman <andrew.banman(a)hpe.com>
Link:
https://lkml.kernel.org/r/20180328174011.041801248@stormcage.americas.sgi.c…
---
arch/x86/include/asm/uv/uv_mmrs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h
b/arch/x86/include/asm/uv/uv_mmrs.h
index ecb9ddef128f..62c79e26a59a 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR
uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR
uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
The patch titled
Subject: ipc/shm: fix use-after-free of shm file via remap_file_pages()
has been added to the -mm tree. Its filename is
ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/ipc-shm-fix-use-after-free-of-shm-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/ipc-shm-fix-use-after-free-of-shm-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Eric Biggers <ebiggers(a)google.com>
Subject: ipc/shm: fix use-after-free of shm file via remap_file_pages()
syzbot reported a use-after-free of shm_file_data(file)->file->f_op in
shm_get_unmapped_area(), called via sys_remap_file_pages(). Unfortunately
it couldn't generate a reproducer, but I found a bug which I think caused
it. When remap_file_pages() is passed a full System V shared memory
segment, the memory is first unmapped, then a new map is created using the
->vm_file. Between these steps, the shm ID can be removed and reused for
a new shm segment. But, shm_mmap() only checks whether the ID is
currently valid before calling the underlying file's ->mmap(); it doesn't
check whether it was reused. Thus it can use the wrong underlying file,
one that was already freed.
Fix this by making the "outer" shm file (the one that gets put in
->vm_file) hold a reference to the real shm file, and by making
__shm_open() require that the file associated with the shm ID matches the
one associated with the "outer" file.
Commit 1ac0b6dec656 ("ipc/shm: handle removed segments gracefully in
shm_mmap()") almost fixed this bug, but it didn't go far enough because it
didn't consider the case where the shm ID is reused.
The following program usually reproduces this bug:
#include <stdlib.h>
#include <sys/shm.h>
#include <sys/syscall.h>
#include <unistd.h>
int main()
{
int is_parent = (fork() != 0);
srand(getpid());
for (;;) {
int id = shmget(0xF00F, 4096, IPC_CREAT|0700);
if (is_parent) {
void *addr = shmat(id, NULL, 0);
usleep(rand() % 50);
while (!syscall(__NR_remap_file_pages, addr, 4096, 0, 0, 0));
} else {
usleep(rand() % 50);
shmctl(id, IPC_RMID, NULL);
}
}
}
It causes the following NULL pointer dereference due to a 'struct file'
being used while it's being freed. (I couldn't actually get a KASAN
use-after-free splat like in the syzbot report. But I think it's possible
with this bug; it would just take a more extraordinary race...)
BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
PGD 0 P4D 0
Oops: 0000 [#1] SMP NOPTI
CPU: 9 PID: 258 Comm: syz_ipc Not tainted 4.16.0-05140-gf8cf2f16a7c95 #189
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
RIP: 0010:d_inode include/linux/dcache.h:519 [inline]
RIP: 0010:touch_atime+0x25/0xd0 fs/inode.c:1724
[...]
Call Trace:
file_accessed include/linux/fs.h:2063 [inline]
shmem_mmap+0x25/0x40 mm/shmem.c:2149
call_mmap include/linux/fs.h:1789 [inline]
shm_mmap+0x34/0x80 ipc/shm.c:465
call_mmap include/linux/fs.h:1789 [inline]
mmap_region+0x309/0x5b0 mm/mmap.c:1712
do_mmap+0x294/0x4a0 mm/mmap.c:1483
do_mmap_pgoff include/linux/mm.h:2235 [inline]
SYSC_remap_file_pages mm/mmap.c:2853 [inline]
SyS_remap_file_pages+0x232/0x310 mm/mmap.c:2769
do_syscall_64+0x64/0x1a0 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x42/0xb7
Link: http://lkml.kernel.org/r/20180409043039.28915-1-ebiggers3@gmail.com
Reported-by: syzbot+d11f321e7f1923157eac80aa990b446596f46439(a)syzkaller.appspotmail.com
Fixes: c8d78c1823f4 ("mm: replace remap_file_pages() syscall with emulation")
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Manfred Spraul <manfred(a)colorfullife.com>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
diff -puN ipc/shm.c~ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages ipc/shm.c
--- a/ipc/shm.c~ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages
+++ a/ipc/shm.c
@@ -225,6 +225,12 @@ static int __shm_open(struct vm_area_str
if (IS_ERR(shp))
return PTR_ERR(shp);
+ if (shp->shm_file != sfd->file) {
+ /* ID was reused */
+ shm_unlock(shp);
+ return -EINVAL;
+ }
+
shp->shm_atim = ktime_get_real_seconds();
ipc_update_pid(&shp->shm_lprid, task_tgid(current));
shp->shm_nattch++;
@@ -455,8 +461,9 @@ static int shm_mmap(struct file *file, s
int ret;
/*
- * In case of remap_file_pages() emulation, the file can represent
- * removed IPC ID: propogate shm_lock() error to caller.
+ * In case of remap_file_pages() emulation, the file can represent an
+ * IPC ID that was removed, and possibly even reused by another shm
+ * segment already. Propagate this case as an error to caller.
*/
ret = __shm_open(vma);
if (ret)
@@ -480,6 +487,7 @@ static int shm_release(struct inode *ino
struct shm_file_data *sfd = shm_file_data(file);
put_ipc_ns(sfd->ns);
+ fput(sfd->file);
shm_file_data(file) = NULL;
kfree(sfd);
return 0;
@@ -1432,7 +1440,7 @@ long do_shmat(int shmid, char __user *sh
file->f_mapping = shp->shm_file->f_mapping;
sfd->id = shp->shm_perm.id;
sfd->ns = get_ipc_ns(ns);
- sfd->file = shp->shm_file;
+ sfd->file = get_file(shp->shm_file);
sfd->vm_ops = NULL;
err = security_mmap_file(file, prot, flags);
_
Patches currently in -mm which might be from ebiggers(a)google.com are
ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages.patch
The patch titled
Subject: get_user_pages_fast(): return -EFAULT on access_ok failure
has been added to the -mm tree. Its filename is
gup-return-efault-on-access_ok-failure.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/gup-return-efault-on-access_ok-fai…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/gup-return-efault-on-access_ok-fai…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: "Michael S. Tsirkin" <mst(a)redhat.com>
Subject: get_user_pages_fast(): return -EFAULT on access_ok failure
get_user_pages_fast is supposed to be a faster drop-in equivalent of
get_user_pages. As such, callers expect it to return a negative return
code when passed an invalid address, and never expect it to return 0 when
passed a positive number of pages, since its documentation says:
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno.
When get_user_pages_fast fall back on get_user_pages this is exactly what
happens. Unfortunately the implementation is inconsistent: it returns 0
if passed a kernel address, confusing callers: for example, the following
is pretty common but does not appear to do the right thing with a kernel
address:
ret = get_user_pages_fast(addr, 1, writeable, &page);
if (ret < 0)
return ret;
Change get_user_pages_fast to return -EFAULT when supplied a kernel
address to make it match expectations.
All callers have been audited for consistency with the documented
semantics.
Link: http://lkml.kernel.org/r/1522962072-182137-4-git-send-email-mst@redhat.com
Fixes: 5b65c4677a57 ("mm, x86/mm: Fix performance regression in get_user_pages_fast()")
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
Reported-by: syzbot+6304bf97ef436580fede(a)syzkaller.appspotmail.com
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Thorsten Leemhuis <regressions(a)leemhuis.info>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
diff -puN mm/gup.c~gup-return-efault-on-access_ok-failure mm/gup.c
--- a/mm/gup.c~gup-return-efault-on-access_ok-failure
+++ a/mm/gup.c
@@ -1806,9 +1806,12 @@ int get_user_pages_fast(unsigned long st
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
+ if (nr_pages <= 0)
+ return 0;
+
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
(void __user *)start, len)))
- return 0;
+ return -EFAULT;
if (gup_fast_permitted(start, nr_pages, write)) {
local_irq_disable();
_
Patches currently in -mm which might be from mst(a)redhat.com are
mm-gup_benchmark-handle-gup-failures.patch
gup-return-efault-on-access_ok-failure.patch
mm-gup-document-return-value.patch
The patch titled
Subject: mm-gup_benchmark-handle-gup-failures-fix
has been added to the -mm tree. Its filename is
mm-gup_benchmark-handle-gup-failures-fix.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-gup_benchmark-handle-gup-failur…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-gup_benchmark-handle-gup-failur…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Andrew Morton <akpm(a)linux-foundation.org>
Subject: mm-gup_benchmark-handle-gup-failures-fix
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: "Michael S. Tsirkin" <mst(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Thorsten Leemhuis <regressions(a)leemhuis.info>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
diff -puN mm/gup_benchmark.c~mm-gup_benchmark-handle-gup-failures-fix mm/gup_benchmark.c
--- a/mm/gup_benchmark.c~mm-gup_benchmark-handle-gup-failures-fix
+++ a/mm/gup_benchmark.c
@@ -41,8 +41,9 @@ static int __gup_benchmark_ioctl(unsigne
}
nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
- if (nr > 0)
- i += nr;
+ if (nr <= 0)
+ break;
+ i += nr;
}
end_time = ktime_get();
_
Patches currently in -mm which might be from akpm(a)linux-foundation.org are
i-need-old-gcc.patch
mm-gup_benchmark-handle-gup-failures-fix.patch
arm-arch-arm-include-asm-pageh-needs-personalityh.patch
ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery-checkpatch-fixes.patch
mm.patch
z3fold-fix-memory-leak-fix.patch
list_lru-prefetch-neighboring-list-entries-before-acquiring-lock-fix.patch
mm-oom-cgroup-aware-oom-killer-fix.patch
mm-oom-docs-describe-the-cgroup-aware-oom-killer-fix-2-fix.patch
proc-add-seq_put_decimal_ull_width-to-speed-up-proc-pid-smaps-fix.patch
fs-reiserfs-journalc-add-missing-resierfs_warning-arg.patch
ipc-shmc-shm_split-remove-unneeded-test-for-null-shm_file_datavm_ops.patch
linux-next-rejects.patch
linux-next-git-rejects.patch
linux-next-fixup.patch
fs-fsnotify-account-fsnotify-metadata-to-kmemcg-fix.patch
kernel-forkc-export-kernel_thread-to-modules.patch
slab-leaks3-default-y.patch
The patch titled
Subject: mm/gup_benchmark: handle gup failures
has been added to the -mm tree. Its filename is
mm-gup_benchmark-handle-gup-failures.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-gup_benchmark-handle-gup-failur…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-gup_benchmark-handle-gup-failur…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: "Michael S. Tsirkin" <mst(a)redhat.com>
Subject: mm/gup_benchmark: handle gup failures
Patch series "mm/get_user_pages_fast fixes, cleanups", v2.
Turns out get_user_pages_fast and __get_user_pages_fast return different
values on error when given a single page: __get_user_pages_fast returns 0.
get_user_pages_fast returns either 0 or an error.
Callers of get_user_pages_fast expect an error so fix it up to return an
error consistently.
Stress the difference between get_user_pages_fast and
__get_user_pages_fast to make sure callers aren't confused.
This patch (of 3):
__gup_benchmark_ioctl does not handle the case where get_user_pages_fast
fails:
- a negative return code will cause a buffer overrun
- returning with partial success will cause use of
uninitialized memory.
Link: http://lkml.kernel.org/r/1522962072-182137-3-git-send-email-mst@redhat.com
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Thorsten Leemhuis <regressions(a)leemhuis.info>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
diff -puN mm/gup_benchmark.c~mm-gup_benchmark-handle-gup-failures mm/gup_benchmark.c
--- a/mm/gup_benchmark.c~mm-gup_benchmark-handle-gup-failures
+++ a/mm/gup_benchmark.c
@@ -23,7 +23,7 @@ static int __gup_benchmark_ioctl(unsigne
struct page **pages;
nr_pages = gup->size / PAGE_SIZE;
- pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
+ pages = kvzalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
if (!pages)
return -ENOMEM;
@@ -41,7 +41,8 @@ static int __gup_benchmark_ioctl(unsigne
}
nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
- i += nr;
+ if (nr > 0)
+ i += nr;
}
end_time = ktime_get();
_
Patches currently in -mm which might be from mst(a)redhat.com are
mm-gup_benchmark-handle-gup-failures.patch
gup-return-efault-on-access_ok-failure.patch
mm-gup-document-return-value.patch
The patch titled
Subject: resource: fix integer overflow at reallocation
has been added to the -mm tree. Its filename is
resource-fix-integer-overflow-at-reallocation.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/resource-fix-integer-overflow-at-r…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/resource-fix-integer-overflow-at-r…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Takashi Iwai <tiwai(a)suse.de>
Subject: resource: fix integer overflow at reallocation
We've got a bug report indicating a kernel panic at booting on an x86-32
system, and it turned out to be the invalid resource assigned after PCI
resource reallocation. __find_resource() first aligns the resource start
address and resets the end address with start+size-1 accordingly, then
checks whether it's contained. Here the end address may overflow the
integer, although resource_contains() still returns true because the
function validates only start and end address. So this ends up with
returning an invalid resource (start > end).
There was already an attempt to cover such a problem in the commit
47ea91b4052d ("Resource: fix wrong resource window calculation"), but this
case is an overseen one.
This patch adds the validity check in resource_contains() to see whether
the given resource has a valid range for avoiding the integer overflow
problem.
Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1086739
Link: http://lkml.kernel.org/r/20180408072026.27365-1-tiwai@suse.de
Fixes: 23c570a67448 ("resource: ability to resize an allocated resource")
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Reported-by: Michael Henders <hendersm(a)shaw.ca>
Tested-by: Michael Henders <hendersm(a)shaw.ca>
Reviewed-by: Ram Pai <linuxram(a)us.ibm.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
diff -puN include/linux/ioport.h~resource-fix-integer-overflow-at-reallocation include/linux/ioport.h
--- a/include/linux/ioport.h~resource-fix-integer-overflow-at-reallocation
+++ a/include/linux/ioport.h
@@ -212,6 +212,9 @@ static inline bool resource_contains(str
return false;
if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET)
return false;
+ /* sanity check whether it's a valid resource range */
+ if (r2->end < r2->start)
+ return false;
return r1->start <= r2->start && r1->end >= r2->end;
}
_
Patches currently in -mm which might be from tiwai(a)suse.de are
resource-fix-integer-overflow-at-reallocation.patch
The patch titled
Subject: mm: hwpoison: disable memory error handling on 1GB hugepage
has been removed from the -mm tree. Its filename was
mm-hwpoison-disable-memory-error-handling-on-1gb-hugepage.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Subject: mm: hwpoison: disable memory error handling on 1GB hugepage
Recently the following BUG was reported:
Injecting memory failure for pfn 0x3c0000 at process virtual address 0x7fe300000000
Memory failure: 0x3c0000: recovery action for huge page: Recovered
BUG: unable to handle kernel paging request at ffff8dfcc0003000
IP: gup_pgd_range+0x1f0/0xc20
PGD 17ae72067 P4D 17ae72067 PUD 0
Oops: 0000 [#1] SMP PTI
...
CPU: 3 PID: 5467 Comm: hugetlb_1gb Not tainted 4.15.0-rc8-mm1-abc+ #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
You can easily reproduce this by calling madvise(MADV_HWPOISON) twice on a
1GB hugepage. This happens because get_user_pages_fast() is not aware of
a migration entry on pud that was created in the 1st madvise() event.
I think that conversion to pud-aligned migration entry is working, but
other MM code walking over page table isn't prepared for it. We need some
time and effort to make all this work properly, so this patch avoids the
reported bug by just disabling error handling for 1GB hugepage.
[n-horiguchi(a)ah.jp.nec.com: v2]
Link: http://lkml.kernel.org/r/1517284444-18149-1-git-send-email-n-horiguchi@ah.j…
Link: http://lkml.kernel.org/r/1517207283-15769-1-git-send-email-n-horiguchi@ah.j…
Signed-off-by: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Punit Agrawal <punit.agrawal(a)arm.com>
Tested-by: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Anshuman Khandual <khandual(a)linux.vnet.ibm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 1 +
mm/memory-failure.c | 16 ++++++++++++++++
2 files changed, 17 insertions(+)
diff -puN include/linux/mm.h~mm-hwpoison-disable-memory-error-handling-on-1gb-hugepage include/linux/mm.h
--- a/include/linux/mm.h~mm-hwpoison-disable-memory-error-handling-on-1gb-hugepage
+++ a/include/linux/mm.h
@@ -2613,6 +2613,7 @@ enum mf_action_page_type {
MF_MSG_POISONED_HUGE,
MF_MSG_HUGE,
MF_MSG_FREE_HUGE,
+ MF_MSG_NON_PMD_HUGE,
MF_MSG_UNMAP_FAILED,
MF_MSG_DIRTY_SWAPCACHE,
MF_MSG_CLEAN_SWAPCACHE,
diff -puN mm/memory-failure.c~mm-hwpoison-disable-memory-error-handling-on-1gb-hugepage mm/memory-failure.c
--- a/mm/memory-failure.c~mm-hwpoison-disable-memory-error-handling-on-1gb-hugepage
+++ a/mm/memory-failure.c
@@ -502,6 +502,7 @@ static const char * const action_page_ty
[MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned",
[MF_MSG_HUGE] = "huge page",
[MF_MSG_FREE_HUGE] = "free huge page",
+ [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page",
[MF_MSG_UNMAP_FAILED] = "unmapping failed page",
[MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page",
[MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page",
@@ -1084,6 +1085,21 @@ static int memory_failure_hugetlb(unsign
return 0;
}
+ /*
+ * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
+ * simply disable it. In order to make it work properly, we need
+ * make sure that:
+ * - conversion of a pud that maps an error hugetlb into hwpoison
+ * entry properly works, and
+ * - other mm code walking over page table is aware of pud-aligned
+ * hwpoison entries.
+ */
+ if (huge_page_size(page_hstate(head)) > PMD_SIZE) {
+ action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED);
+ res = -EBUSY;
+ goto out;
+ }
+
if (!hwpoison_user_mappings(p, pfn, flags, &head)) {
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
res = -EBUSY;
_
Patches currently in -mm which might be from n-horiguchi(a)ah.jp.nec.com are
The patch titled
Subject: hugetlbfs: fix bug in pgoff overflow checking
has been removed from the -mm tree. Its filename was
hugetlbfs-fix-bug-in-pgoff-overflow-checking.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: fix bug in pgoff overflow checking
This is a fix for a regression in 32 bit kernels caused by an invalid
check for pgoff overflow in hugetlbfs mmap setup. The check incorrectly
specified that the size of a loff_t was the same as the size of a long.
The regression prevents mapping hugetlbfs files at offsets greater than
4GB on 32 bit kernels.
On 32 bit kernels conversion from a page based unsigned long can not
overflow a loff_t byte offset. Therefore, skip this check if
sizeof(unsigned long) != sizeof(loff_t).
Link: http://lkml.kernel.org/r/20180330145402.5053-1-mike.kravetz@oracle.com
Fixes: 63489f8e8211 ("hugetlbfs: check for pgoff value overflow")
Reported-by: Dan Rue <dan.rue(a)linaro.org>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Tested-by: Anders Roxell <anders.roxell(a)linaro.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Yisheng Xie <xieyisheng1(a)huawei.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Nic Losby <blurbdust(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/hugetlbfs/inode.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff -puN fs/hugetlbfs/inode.c~hugetlbfs-fix-bug-in-pgoff-overflow-checking fs/hugetlbfs/inode.c
--- a/fs/hugetlbfs/inode.c~hugetlbfs-fix-bug-in-pgoff-overflow-checking
+++ a/fs/hugetlbfs/inode.c
@@ -138,10 +138,14 @@ static int hugetlbfs_file_mmap(struct fi
/*
* page based offset in vm_pgoff could be sufficiently large to
- * overflow a (l)off_t when converted to byte offset.
+ * overflow a loff_t when converted to byte offset. This can
+ * only happen on architectures where sizeof(loff_t) ==
+ * sizeof(unsigned long). So, only check in those instances.
*/
- if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
- return -EINVAL;
+ if (sizeof(unsigned long) == sizeof(loff_t)) {
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ return -EINVAL;
+ }
/* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
If a completion occurs after blk_mq_rq_timed_out() has reset
rq->aborted_gstate and the request is again in flight when the timeout
expires then a request will be completed twice: a first time by the
timeout handler and a second time when the regular completion occurs.
Additionally, the blk-mq timeout handling code ignores completions that
occur after blk_mq_check_expired() has been called and before
blk_mq_rq_timed_out() has reset rq->aborted_gstate. If a block driver
timeout handler always returns BLK_EH_RESET_TIMER then the result will
be that the request never terminates.
Since the request state can be updated from two different contexts,
namely regular completion and request timeout, this race cannot be
fixed with RCU synchronization only. Fix this race as follows:
- Introduce a spinlock to protect the request state and deadline changes.
- Use the deadline instead of the request generation to detect whether
or not a request timer fired after reinitialization of a request.
- Store the request state in the lowest two bits of the deadline instead
of the lowest two bits of 'gstate'.
- Remove all request member variables that became superfluous due to
this change: gstate, aborted_gstate, gstate_seq and aborted_gstate_sync.
- Remove the request state information that became superfluous due to this
patch, namely RQF_MQ_TIMEOUT_EXPIRED.
- Remove the code that became superfluous due to this change, namely
the RCU lock and unlock statements in blk_mq_complete_request() and
also the synchronize_rcu() call in the timeout handler.
This patch fixes the following kernel crash:
BUG: unable to handle kernel NULL pointer dereference at (null)
Oops: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 151 Comm: kworker/2:1H Tainted: G W 4.15.0-dbg+ #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Workqueue: kblockd blk_mq_timeout_work
RIP: 0010:scsi_times_out+0x17/0x2c0 [scsi_mod]
Call Trace:
blk_mq_terminate_expired+0x42/0x80
bt_iter+0x3d/0x50
blk_mq_queue_tag_busy_iter+0xe9/0x200
blk_mq_timeout_work+0x181/0x2e0
process_one_work+0x21c/0x6d0
worker_thread+0x35/0x380
kthread+0x117/0x130
ret_from_fork+0x24/0x30
Fixes: 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a RCU and generation based scheme")
Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Israel Rukshin <israelr(a)mellanox.com>,
Cc: Max Gurtovoy <maxg(a)mellanox.com>
Cc: <stable(a)vger.kernel.org> # v4.16
---
block/blk-core.c | 3 +-
block/blk-mq-debugfs.c | 1 -
block/blk-mq.c | 178 +++++++++++--------------------------------------
block/blk-mq.h | 25 ++-----
block/blk-timeout.c | 1 -
block/blk.h | 4 +-
include/linux/blkdev.h | 28 ++------
7 files changed, 53 insertions(+), 187 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 2623e609db4a..83c7a58e4fb3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -200,8 +200,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
+ spin_lock_init(&rq->state_lock);
}
EXPORT_SYMBOL(blk_rq_init);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 6f72413b6cab..80c7c585769f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -345,7 +345,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
- RQF_NAME(MQ_TIMEOUT_EXPIRED),
RQF_NAME(MQ_POLL_SLEPT),
};
#undef RQF_NAME
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7816d28b7219..1da16d5e5cf1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -305,7 +305,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
rq->special = NULL;
/* tag was already set */
rq->extra_len = 0;
- rq->__deadline = 0;
INIT_LIST_HEAD(&rq->timeout_list);
rq->timeout = 0;
@@ -527,8 +526,7 @@ static void __blk_mq_complete_request(struct request *rq)
bool shared = false;
int cpu;
- WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
- blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+ WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_COMPLETE);
if (rq->internal_tag != -1)
blk_mq_sched_completed_request(rq);
@@ -577,34 +575,26 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
*srcu_idx = srcu_read_lock(hctx->srcu);
}
-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
+/**
+ * blk_mq_change_rq_state - atomically test and set request state
+ * @rq: Request pointer.
+ * @old: Old request state.
+ * @new: New request state.
+ */
+static bool blk_mq_change_rq_state(struct request *rq, enum mq_rq_state old,
+ enum mq_rq_state new)
{
unsigned long flags;
+ bool changed_state = false;
- /*
- * blk_mq_rq_aborted_gstate() is used from the completion path and
- * can thus be called from irq context. u64_stats_fetch in the
- * middle of update on the same CPU leads to lockup. Disable irq
- * while updating.
- */
- local_irq_save(flags);
- u64_stats_update_begin(&rq->aborted_gstate_sync);
- rq->aborted_gstate = gstate;
- u64_stats_update_end(&rq->aborted_gstate_sync);
- local_irq_restore(flags);
-}
-
-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
-{
- unsigned int start;
- u64 aborted_gstate;
-
- do {
- start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
- aborted_gstate = rq->aborted_gstate;
- } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
+ spin_lock_irqsave(&rq->state_lock, flags);
+ if (blk_mq_rq_state(rq) == old) {
+ blk_mq_rq_update_state(rq, new);
+ changed_state = true;
+ }
+ spin_unlock_irqrestore(&rq->state_lock, flags);
- return aborted_gstate;
+ return changed_state;
}
/**
@@ -618,27 +608,12 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
void blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
- int srcu_idx;
if (unlikely(blk_should_fake_timeout(q)))
return;
- /*
- * If @rq->aborted_gstate equals the current instance, timeout is
- * claiming @rq and we lost. This is synchronized through
- * hctx_lock(). See blk_mq_timeout_work() for details.
- *
- * Completion path never blocks and we can directly use RCU here
- * instead of hctx_lock() which can be either RCU or SRCU.
- * However, that would complicate paths which want to synchronize
- * against us. Let stay in sync with the issue path so that
- * hctx_lock() covers both issue and completion paths.
- */
- hctx_lock(hctx, &srcu_idx);
- if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
+ if (blk_mq_change_rq_state(rq, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE))
__blk_mq_complete_request(rq);
- hctx_unlock(hctx, srcu_idx);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -665,24 +640,14 @@ void blk_mq_start_request(struct request *rq)
WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
/*
- * Mark @rq in-flight which also advances the generation number,
- * and register for timeout. Protect with a seqcount to allow the
- * timeout path to read both @rq->gstate and @rq->deadline
- * coherently.
- *
- * This is the only place where a request is marked in-flight. If
- * the timeout path reads an in-flight @rq->gstate, the
- * @rq->deadline it reads together under @rq->gstate_seq is
- * guaranteed to be the matching one.
+ * Mark @rq in-flight and register for timeout. Because blk_add_timer()
+ * updates the deadline, if a timer set by a previous incarnation of
+ * this request fires this request will be skipped by the timeout code.
*/
- preempt_disable();
- write_seqcount_begin(&rq->gstate_seq);
-
+ spin_lock_irq(&rq->state_lock);
blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
blk_add_timer(rq);
-
- write_seqcount_end(&rq->gstate_seq);
- preempt_enable();
+ spin_unlock_irq(&rq->state_lock);
if (q->dma_drain_size && blk_rq_bytes(rq)) {
/*
@@ -695,11 +660,6 @@ void blk_mq_start_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_start_request);
-/*
- * When we reach here because queue is busy, it's safe to change the state
- * to IDLE without checking @rq->aborted_gstate because we should still be
- * holding the RCU read lock and thus protected against timeout.
- */
static void __blk_mq_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -811,15 +771,13 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq);
struct blk_mq_timeout_data {
unsigned long next;
unsigned int next_set;
- unsigned int nr_expired;
};
static void blk_mq_rq_timed_out(struct request *req, bool reserved)
{
const struct blk_mq_ops *ops = req->q->mq_ops;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
-
- req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
+ unsigned long flags;
if (ops->timeout)
ret = ops->timeout(req, reserved);
@@ -829,13 +787,10 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
__blk_mq_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
- /*
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored
- * completions and further spurious timeouts.
- */
- blk_mq_rq_update_aborted_gstate(req, 0);
+ spin_lock_irqsave(&req->state_lock, flags);
blk_add_timer(req);
+ blk_mq_rq_update_state(req, MQ_RQ_IN_FLIGHT);
+ spin_unlock_irqrestore(&req->state_lock, flags);
break;
case BLK_EH_NOT_HANDLED:
break;
@@ -849,48 +804,23 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
struct blk_mq_timeout_data *data = priv;
- unsigned long gstate, deadline;
- int start;
-
- might_sleep();
-
- if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
- return;
-
- /* read coherent snapshots of @rq->state_gen and @rq->deadline */
- while (true) {
- start = read_seqcount_begin(&rq->gstate_seq);
- gstate = READ_ONCE(rq->gstate);
- deadline = blk_rq_deadline(rq);
- if (!read_seqcount_retry(&rq->gstate_seq, start))
- break;
- cond_resched();
- }
+ unsigned long deadline;
+ bool timed_out = false;
- /* if in-flight && overdue, mark for abortion */
- if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
+ spin_lock_irq(&rq->state_lock);
+ deadline = blk_rq_deadline(rq);
+ if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT &&
time_after_eq(jiffies, deadline)) {
- blk_mq_rq_update_aborted_gstate(rq, gstate);
- data->nr_expired++;
+ blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+ timed_out = true;
hctx->nr_expired++;
} else if (!data->next_set || time_after(data->next, deadline)) {
data->next = deadline;
data->next_set = 1;
}
-}
+ spin_unlock_irq(&rq->state_lock);
-static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
- struct request *rq, void *priv, bool reserved)
-{
- /*
- * We marked @rq->aborted_gstate and waited for RCU. If there were
- * completions that we lost to, they would have finished and
- * updated @rq->gstate by now; otherwise, the completion path is
- * now guaranteed to see @rq->aborted_gstate and yield. If
- * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
- */
- if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
- READ_ONCE(rq->gstate) == rq->aborted_gstate)
+ if (timed_out)
blk_mq_rq_timed_out(rq, reserved);
}
@@ -898,11 +828,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
{
struct request_queue *q =
container_of(work, struct request_queue, timeout_work);
- struct blk_mq_timeout_data data = {
- .next = 0,
- .next_set = 0,
- .nr_expired = 0,
- };
+ struct blk_mq_timeout_data data = { };
struct blk_mq_hw_ctx *hctx;
int i;
@@ -925,33 +851,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
/* scan for the expired ones and set their ->aborted_gstate */
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
- if (data.nr_expired) {
- bool has_rcu = false;
-
- /*
- * Wait till everyone sees ->aborted_gstate. The
- * sequential waits for SRCUs aren't ideal. If this ever
- * becomes a problem, we can add per-hw_ctx rcu_head and
- * wait in parallel.
- */
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx->nr_expired)
- continue;
-
- if (!(hctx->flags & BLK_MQ_F_BLOCKING))
- has_rcu = true;
- else
- synchronize_srcu(hctx->srcu);
-
- hctx->nr_expired = 0;
- }
- if (has_rcu)
- synchronize_rcu();
-
- /* terminate the ones we won */
- blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
- }
-
if (data.next_set) {
data.next = blk_rq_timeout(round_jiffies_up(data.next));
mod_timer(&q->timeout, data.next);
@@ -2087,8 +1986,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return ret;
}
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
+ spin_lock_init(&rq->state_lock);
return 0;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 88c558f71819..d4d72f95d5a9 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,10 +27,7 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
-/*
- * Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
- * and the upper bits the generation number.
- */
+/* Lowest two bits of request->__deadline. */
enum mq_rq_state {
MQ_RQ_IDLE = 0,
MQ_RQ_IN_FLIGHT = 1,
@@ -38,7 +35,6 @@ enum mq_rq_state {
MQ_RQ_STATE_BITS = 2,
MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
- MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
};
void blk_mq_freeze_queue(struct request_queue *q);
@@ -104,9 +100,9 @@ void blk_mq_release(struct request_queue *q);
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
* @rq: target request.
*/
-static inline int blk_mq_rq_state(struct request *rq)
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
- return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
+ return rq->__deadline & MQ_RQ_STATE_MASK;
}
/**
@@ -115,22 +111,15 @@ static inline int blk_mq_rq_state(struct request *rq)
* @state: new state to set.
*
* Set @rq's state to @state. The caller is responsible for ensuring that
- * there are no other updaters. A request can transition into IN_FLIGHT
- * only from IDLE and doing so increments the generation number.
+ * there are no other updaters.
*/
static inline void blk_mq_rq_update_state(struct request *rq,
enum mq_rq_state state)
{
- u64 old_val = READ_ONCE(rq->gstate);
- u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
-
- if (state == MQ_RQ_IN_FLIGHT) {
- WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
- new_val += MQ_RQ_GEN_INC;
- }
+ unsigned long d = rq->__deadline;
- /* avoid exposing interim values */
- WRITE_ONCE(rq->gstate, new_val);
+ d &= ~(unsigned long)MQ_RQ_STATE_MASK;
+ rq->__deadline = d | state;
}
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 50a191720055..844a98edcf3f 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -217,7 +217,6 @@ void blk_add_timer(struct request *req)
req->timeout = q->rq_timeout;
blk_rq_set_deadline(req, jiffies + req->timeout);
- req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
/*
* Only the non-mq case needs to add the request to a protected list.
diff --git a/block/blk.h b/block/blk.h
index b034fd2460c4..07275598d262 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -314,12 +314,12 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
*/
static inline void blk_rq_set_deadline(struct request *rq, unsigned long time)
{
- rq->__deadline = time & ~0x1UL;
+ rq->__deadline = (time & ~0x3UL) | (rq->__deadline & 3UL);
}
static inline unsigned long blk_rq_deadline(struct request *rq)
{
- return rq->__deadline & ~0x1UL;
+ return rq->__deadline & ~0x3UL;
}
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6075d1a6760c..e0a6a741afd0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,7 +27,6 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
-#include <linux/seqlock.h>
#include <linux/u64_stats_sync.h>
struct module;
@@ -125,8 +124,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
-/* timeout is expired */
-#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
/* already slept for hybrid poll */
#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
@@ -141,6 +138,7 @@ typedef __u32 __bitwise req_flags_t;
* especially blk_mq_rq_ctx_init() to take care of the added fields.
*/
struct request {
+ spinlock_t state_lock; /* protects __deadline for blk-mq */
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
@@ -226,27 +224,11 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
/*
- * On blk-mq, the lower bits of ->gstate (generation number and
- * state) carry the MQ_RQ_* state value and the upper bits the
- * generation number which is monotonically incremented and used to
- * distinguish the reuse instances.
- *
- * ->gstate_seq allows updates to ->gstate and other fields
- * (currently ->deadline) during request start to be read
- * atomically from the timeout path, so that it can operate on a
- * coherent set of information.
+ * access through blk_rq_set_deadline(), blk_rq_deadline() and
+ * blk_mark_rq_complete(), blk_clear_rq_complete() and
+ * blk_rq_is_complete() for legacy queues or blk_mq_rq_state() for
+ * blk-mq queues.
*/
- seqcount_t gstate_seq;
- u64 gstate;
-
- /*
- * ->aborted_gstate is used by the timeout to claim a specific
- * recycle instance of this request. See blk_mq_timeout_work().
- */
- struct u64_stats_sync aborted_gstate_sync;
- u64 aborted_gstate;
-
- /* access through blk_rq_set_deadline, blk_rq_deadline */
unsigned long __deadline;
struct list_head timeout_list;
--
2.16.2
This is a note to let you know that I've just added the patch titled
xfrm: fix state migration copy replay sequence numbers
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
xfrm-fix-state-migration-copy-replay-sequence-numbers.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Antony Antony <antony(a)phenome.org>
Date: Fri, 19 May 2017 12:47:00 +0200
Subject: xfrm: fix state migration copy replay sequence numbers
From: Antony Antony <antony(a)phenome.org>
[ Upstream commit a486cd23661c9387fb076c3f6ae8b2aa9d20d54a ]
During xfrm migration copy replay and preplay sequence numbers
from the previous state.
Here is a tcpdump output showing the problem.
10.0.10.46 is running vanilla kernel, is the IKE/IPsec responder.
After the migration it sent wrong sequence number, reset to 1.
The migration is from 10.0.0.52 to 10.0.0.53.
IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7cf), length 136
IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7cf), length 136
IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d0), length 136
IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7d0), length 136
IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa inf2[I]
IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa inf2[R]
IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa inf2[I]
IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa inf2[R]
IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d1), length 136
NOTE: next sequence is wrong 0x1
IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x1), length 136
IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d2), length 136
IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x2), length 136
Signed-off-by: Antony Antony <antony(a)phenome.org>
Reviewed-by: Richard Guy Briggs <rgb(a)tricolour.ca>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/xfrm/xfrm_state.c | 2 ++
1 file changed, 2 insertions(+)
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1246,6 +1246,8 @@ static struct xfrm_state *xfrm_state_clo
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
x->km.seq = orig->km.seq;
+ x->replay = orig->replay;
+ x->preplay = orig->preplay;
return x;
Patches currently in stable-queue which might be from antony(a)phenome.org are
queue-4.9/xfrm-fix-state-migration-copy-replay-sequence-numbers.patch
This is a note to let you know that I've just added the patch titled
xen: avoid type warning in xchg_xen_ulong
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
xen-avoid-type-warning-in-xchg_xen_ulong.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Thu, 8 Jun 2017 10:53:10 +0200
Subject: xen: avoid type warning in xchg_xen_ulong
From: Arnd Bergmann <arnd(a)arndb.de>
[ Upstream commit 9cc91f212111cdcbefa02dcdb7dd443f224bf52c ]
The improved type-checking version of container_of() triggers a warning for
xchg_xen_ulong, pointing out that 'xen_ulong_t' is unsigned, but atomic64_t
contains a signed value:
drivers/xen/events/events_2l.c: In function 'evtchn_2l_handle_events':
drivers/xen/events/events_2l.c:187:1020: error: call to '__compiletime_assert_187' declared with attribute error: pointer type mismatch in container_of()
This adds a cast to work around the warning.
Cc: Ian Abbott <abbotti(a)mev.co.uk>
Fixes: 85323a991d40 ("xen: arm: mandate EABI and use generic atomic operations.")
Fixes: daa2ac80834d ("kernel.h: handle pointers to arrays better in container_of()")
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Signed-off-by: Stefano Stabellini <sstabellini(a)kernel.org>
Reviewed-by: Stefano Stabellini <sstabellini(a)kernel.org>
Acked-by: Ian Abbott <abbotti(a)mev.co.uk>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/arm/include/asm/xen/events.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/include/asm/xen/events.h
+++ b/arch/arm/include/asm/xen/events.h
@@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(stru
return raw_irqs_disabled_flags(regs->ARM_cpsr);
}
-#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \
+#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\
atomic64_t, \
counter), (val))
Patches currently in stable-queue which might be from arnd(a)arndb.de are
queue-4.9/net-mlx5-avoid-build-warning-for-uniprocessor.patch
queue-4.9/xen-avoid-type-warning-in-xchg_xen_ulong.patch
This is a note to let you know that I've just added the patch titled
x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-kaslr-use-the-_asm_mul-macro-for-multiplication-to-work-around-clang-incompatibility.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Matthias Kaehlcke <mka(a)chromium.org>
Date: Mon, 1 May 2017 15:47:41 -0700
Subject: x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility
From: Matthias Kaehlcke <mka(a)chromium.org>
[ Upstream commit 121843eb02a6e2fa30aefab64bfe183c97230c75 ]
The constraint "rm" allows the compiler to put mix_const into memory.
When the input operand is a memory location then MUL needs an operand
size suffix, since Clang can't infer the multiplication width from the
operand.
Add and use the _ASM_MUL macro which determines the operand size and
resolves to the NUL instruction with the corresponding suffix.
This fixes the following error when building with clang:
CC arch/x86/lib/kaslr.o
/tmp/kaslr-dfe1ad.s: Assembler messages:
/tmp/kaslr-dfe1ad.s:182: Error: no instruction mnemonic suffix given and no register operands; can't size instruction
Signed-off-by: Matthias Kaehlcke <mka(a)chromium.org>
Cc: Grant Grundler <grundler(a)chromium.org>
Cc: Greg Hackmann <ghackmann(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Michael Davidson <md(a)google.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/20170501224741.133938-1-mka@chromium.org
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/asm.h | 1 +
arch/x86/lib/kaslr.c | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -34,6 +34,7 @@
#define _ASM_ADD __ASM_SIZE(add)
#define _ASM_SUB __ASM_SIZE(sub)
#define _ASM_XADD __ASM_SIZE(xadd)
+#define _ASM_MUL __ASM_SIZE(mul)
#define _ASM_AX __ASM_REG(ax)
#define _ASM_BX __ASM_REG(bx)
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -5,6 +5,7 @@
* kernel starts. This file is included in the compressed kernel and
* normally linked in the regular.
*/
+#include <asm/asm.h>
#include <asm/kaslr.h>
#include <asm/msr.h>
#include <asm/archrandom.h>
@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(cons
}
/* Circular multiply for better bit diffusion */
- asm("mul %3"
+ asm(_ASM_MUL "%3"
: "=a" (random), "=d" (raw)
: "a" (random), "rm" (mix_const));
random += raw;
Patches currently in stable-queue which might be from mka(a)chromium.org are
queue-4.9/x86-mm-kaslr-use-the-_asm_mul-macro-for-multiplication-to-work-around-clang-incompatibility.patch
This is a note to let you know that I've just added the patch titled
x86/tsc: Provide 'tsc=unstable' boot parameter
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-tsc-provide-tsc-unstable-boot-parameter.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Thu, 13 Apr 2017 14:56:44 +0200
Subject: x86/tsc: Provide 'tsc=unstable' boot parameter
From: Peter Zijlstra <peterz(a)infradead.org>
[ Upstream commit 8309f86cd41e8714526867177facf7a316d9be53 ]
Since the clocksource watchdog will only detect broken TSC after the
fact, all TSC based clocks will likely have observed non-continuous
values before/when switching away from TSC.
Therefore only thing to fully avoid random clock movement when your
BIOS randomly mucks with TSC values from SMI handlers is reporting the
TSC as unstable at boot.
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: linux-kernel(a)vger.kernel.org
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/tsc.c | 2 ++
1 file changed, 2 insertions(+)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -366,6 +366,8 @@ static int __init tsc_setup(char *str)
tsc_clocksource_reliable = 1;
if (!strncmp(str, "noirqtime", 9))
no_sched_irq_time = 1;
+ if (!strcmp(str, "unstable"))
+ mark_tsc_unstable("boot parameter");
return 1;
}
Patches currently in stable-queue which might be from peterz(a)infradead.org are
queue-4.9/perf-callchain-force-user_ds-when-invoking-perf_callchain_user.patch
queue-4.9/perf-report-fix-off-by-one-for-non-activation-frames.patch
queue-4.9/x86-efi-disable-runtime-services-on-kexec-kernel-if-booted-with-efi-old_map.patch
queue-4.9/perf-tools-fix-copyfile_offset-update-of-output-offset.patch
queue-4.9/sched-numa-use-down_read_trylock-for-the-mmap_sem.patch
queue-4.9/x86-asm-don-t-use-rbp-as-a-temporary-register-in-csum_partial_copy_generic.patch
queue-4.9/perf-core-correct-event-creation-with-perf_format_group.patch
queue-4.9/x86-mm-kaslr-use-the-_asm_mul-macro-for-multiplication-to-work-around-clang-incompatibility.patch
queue-4.9/x86-tsc-provide-tsc-unstable-boot-parameter.patch
queue-4.9/x86-boot-declare-error-as-noreturn.patch
queue-4.9/cpuhotplug-link-lock-stacks-for-hotplug-callbacks.patch
queue-4.9/perf-core-fix-error-handling-in-perf_event_alloc.patch
queue-4.9/sched-deadline-use-the-revised-wakeup-rule-for-suspending-constrained-dl-tasks.patch
This is a note to let you know that I've just added the patch titled
x86/efi: Disable runtime services on kexec kernel if booted with efi=old_map
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-efi-disable-runtime-services-on-kexec-kernel-if-booted-with-efi-old_map.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Sai Praneeth <sai.praneeth.prakhya(a)intel.com>
Date: Fri, 26 May 2017 12:36:49 +0100
Subject: x86/efi: Disable runtime services on kexec kernel if booted with efi=old_map
From: Sai Praneeth <sai.praneeth.prakhya(a)intel.com>
[ Upstream commit 4e52797d2efefac3271abdc54439a3435abd77b9 ]
Booting kexec kernel with "efi=old_map" in kernel command line hits
kernel panic as shown below.
BUG: unable to handle kernel paging request at ffff88007fe78070
IP: virt_efi_set_variable.part.7+0x63/0x1b0
PGD 7ea28067
PUD 7ea2b067
PMD 7ea2d067
PTE 0
[...]
Call Trace:
virt_efi_set_variable()
efi_delete_dummy_variable()
efi_enter_virtual_mode()
start_kernel()
x86_64_start_reservations()
x86_64_start_kernel()
start_cpu()
[ efi=old_map was never intended to work with kexec. The problem with
using efi=old_map is that the virtual addresses are assigned from the
memory region used by other kernel mappings; vmalloc() space.
Potentially there could be collisions when booting kexec if something
else is mapped at the virtual address we allocated for runtime service
regions in the initial boot - Matt Fleming ]
Since kexec was never intended to work with efi=old_map, disable
runtime services in kexec if booted with efi=old_map, so that we don't
panic.
Tested-by: Lee Chun-Yi <jlee(a)suse.com>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya(a)intel.com>
Signed-off-by: Matt Fleming <matt(a)codeblueprint.co.uk>
Acked-by: Dave Young <dyoung(a)redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel(a)linaro.org>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ravi Shankar <ravi.v.shankar(a)intel.com>
Cc: Ricardo Neri <ricardo.neri(a)intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: linux-efi(a)vger.kernel.org
Link: http://lkml.kernel.org/r/20170526113652.21339-4-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/platform/efi/efi.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -832,9 +832,11 @@ static void __init kexec_enter_virtual_m
/*
* We don't do virtual mode, since we don't do runtime services, on
- * non-native EFI
+ * non-native EFI. With efi=old_map, we don't do runtime services in
+ * kexec kernel because in the initial boot something else might
+ * have been mapped at these virtual addresses.
*/
- if (!efi_is_native()) {
+ if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
efi_memmap_unmap();
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
Patches currently in stable-queue which might be from sai.praneeth.prakhya(a)intel.com are
queue-4.9/x86-efi-disable-runtime-services-on-kexec-kernel-if-booted-with-efi-old_map.patch
This is a note to let you know that I've just added the patch titled
x86/boot: Declare error() as noreturn
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-boot-declare-error-as-noreturn.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Kees Cook <keescook(a)chromium.org>
Date: Fri, 5 May 2017 21:51:16 -0700
Subject: x86/boot: Declare error() as noreturn
From: Kees Cook <keescook(a)chromium.org>
[ Upstream commit 60854a12d281e2fa25662fa32ac8022bbff17432 ]
The compressed boot function error() is used to halt execution, but it
wasn't marked with "noreturn". This fixes that in preparation for
supporting kernel FORTIFY_SOURCE, which uses the noreturn annotation
on panic, and calls error(). GCC would warn about a noreturn function
calling a non-noreturn function:
arch/x86/boot/compressed/misc.c: In function ‘fortify_panic’:
arch/x86/boot/compressed/misc.c:416:1: warning: ‘noreturn’ function does return
}
^
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Cc: Daniel Micay <danielmicay(a)gmail.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Link: http://lkml.kernel.org/r/20170506045116.GA2879@beast
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/boot/compressed/error.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
#ifndef BOOT_COMPRESSED_ERROR_H
#define BOOT_COMPRESSED_ERROR_H
+#include <linux/compiler.h>
+
void warn(char *m);
-void error(char *m);
+void error(char *m) __noreturn;
#endif /* BOOT_COMPRESSED_ERROR_H */
Patches currently in stable-queue which might be from keescook(a)chromium.org are
queue-4.9/qlge-avoid-reading-past-end-of-buffer.patch
queue-4.9/pidns-disable-pid-allocation-if-pid_ns_prepare_proc-is-failed-in-alloc_pid.patch
queue-4.9/x86-mm-kaslr-use-the-_asm_mul-macro-for-multiplication-to-work-around-clang-incompatibility.patch
queue-4.9/ray_cs-avoid-reading-past-end-of-buffer.patch
queue-4.9/x86-boot-declare-error-as-noreturn.patch
queue-4.9/selftests-kselftest_harness-fix-compile-warning.patch
queue-4.9/bna-avoid-reading-past-end-of-buffer.patch
This is a note to let you know that I've just added the patch titled
X.509: Fix error code in x509_cert_parse()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x.509-fix-error-code-in-x509_cert_parse.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Date: Thu, 8 Jun 2017 14:47:49 +0100
Subject: X.509: Fix error code in x509_cert_parse()
From: Dan Carpenter <dan.carpenter(a)oracle.com>
[ Upstream commit 4e880168e9ffb1cdbdb72b3b48ab0324b30c2d62 ]
We forgot to set the error code on this path so it could result in
returning NULL which leads to a NULL dereference.
Fixes: db6c43bd2132 ("crypto: KEYS: convert public key and digsig asym to the akcipher api")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: James Morris <james.l.morris(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
crypto/asymmetric_keys/x509_cert_parser.c | 1 +
1 file changed, 1 insertion(+)
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -102,6 +102,7 @@ struct x509_certificate *x509_cert_parse
}
}
+ ret = -ENOMEM;
cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL);
if (!cert->pub->key)
goto error_decode;
Patches currently in stable-queue which might be from dan.carpenter(a)oracle.com are
queue-4.9/block-fix-an-error-code-in-add_partition.patch
queue-4.9/x.509-fix-error-code-in-x509_cert_parse.patch
queue-4.9/rdma-iw_cxgb4-avoid-touch-after-free-error-in-arp-failure-handlers.patch
queue-4.9/drm-amdkfd-null-dereference-involving-create_process.patch
queue-4.9/pnfs-flexfiles-missing-error-code-in-ff_layout_alloc_lseg.patch
queue-4.9/drivers-misc-vmw_vmci-vmci_queue_pair.c-fix-a-couple-integer-overflow-tests.patch
queue-4.9/cxl-unlock-on-error-in-probe.patch
queue-4.9/md-cluster-fix-potential-lock-issue-in-add_new_disk.patch
queue-4.9/ipmi_ssif-unlock-on-allocation-failure.patch
queue-4.9/powercap-fix-an-error-code-in-powercap_register_zone.patch
queue-4.9/perf-core-fix-error-handling-in-perf_event_alloc.patch
queue-4.9/libceph-null-deref-on-crush_decode-error-path.patch
This is a note to let you know that I've just added the patch titled
x86/asm: Don't use RBP as a temporary register in csum_partial_copy_generic()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-asm-don-t-use-rbp-as-a-temporary-register-in-csum_partial_copy_generic.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Apr 9 17:09:24 CEST 2018
From: Josh Poimboeuf <jpoimboe(a)redhat.com>
Date: Thu, 4 May 2017 09:51:40 -0500
Subject: x86/asm: Don't use RBP as a temporary register in csum_partial_copy_generic()
From: Josh Poimboeuf <jpoimboe(a)redhat.com>
[ Upstream commit 42fc6c6cb1662ba2fa727dd01c9473c63be4e3b6 ]
Andrey Konovalov reported the following warning while fuzzing the kernel
with syzkaller:
WARNING: kernel stack regs at ffff8800686869f8 in a.out:4933 has bad 'bp' value c3fc855a10167ec0
The unwinder dump revealed that RBP had a bad value when an interrupt
occurred in csum_partial_copy_generic().
That function saves RBP on the stack and then overwrites it, using it as
a scratch register. That's problematic because it breaks stack traces
if an interrupt occurs in the middle of the function.
Replace the usage of RBP with another callee-saved register (R15) so
stack traces are no longer affected.
Reported-by: Andrey Konovalov <andreyknvl(a)google.com>
Tested-by: Andrey Konovalov <andreyknvl(a)google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Cong Wang <xiyou.wangcong(a)gmail.com>
Cc: David S . Miller <davem(a)davemloft.net>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Kostya Serebryany <kcc(a)google.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com>
Cc: Neil Horman <nhorman(a)tuxdriver.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Vlad Yasevich <vyasevich(a)gmail.com>
Cc: linux-sctp(a)vger.kernel.org
Cc: netdev <netdev(a)vger.kernel.org>
Cc: syzkaller <syzkaller(a)googlegroups.com>
Link: http://lkml.kernel.org/r/4b03a961efda5ec9bfe46b7b9c9ad72d1efad343.149390948…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/lib/csum-copy_64.S | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
movq %r12, 3*8(%rsp)
movq %r14, 4*8(%rsp)
movq %r13, 5*8(%rsp)
- movq %rbp, 6*8(%rsp)
+ movq %r15, 6*8(%rsp)
movq %r8, (%rsp)
movq %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
/* main loop. clear in 64 byte blocks */
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
- /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
+ /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
.p2align 4
.Lloop:
source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
source
movq 32(%rdi), %r10
source
- movq 40(%rdi), %rbp
+ movq 40(%rdi), %r15
source
movq 48(%rdi), %r14
source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
adcq %r11, %rax
adcq %rdx, %rax
adcq %r10, %rax
- adcq %rbp, %rax
+ adcq %r15, %rax
adcq %r14, %rax
adcq %r13, %rax
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
dest
movq %r10, 32(%rsi)
dest
- movq %rbp, 40(%rsi)
+ movq %r15, 40(%rsi)
dest
movq %r14, 48(%rsi)
dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
movq 3*8(%rsp), %r12
movq 4*8(%rsp), %r14
movq 5*8(%rsp), %r13
- movq 6*8(%rsp), %rbp
+ movq 6*8(%rsp), %r15
addq $7*8, %rsp
ret
Patches currently in stable-queue which might be from jpoimboe(a)redhat.com are
queue-4.9/x86-asm-don-t-use-rbp-as-a-temporary-register-in-csum_partial_copy_generic.patch