From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 40ea97290b08be2e038b31cbb33097d1145e8169 ]
New tooling noticed this mishap:
kernel/kcov.o: warning: objtool: write_comp_data()+0x138: call to __stack_chk_fail() with UACCESS enabled kernel/kcov.o: warning: objtool: __sanitizer_cov_trace_pc()+0xd9: call to __stack_chk_fail() with UACCESS enabled
All the other instrumentation (KASAN,UBSAN) also have stack protector disabled.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Borislav Petkov bp@alien8.de Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/Makefile | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/Makefile b/kernel/Makefile index 172d151d429c..3085141c055c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -30,6 +30,7 @@ KCOV_INSTRUMENT_extable.o := n # Don't self-instrument. KCOV_INSTRUMENT_kcov.o := n KASAN_SANITIZE_kcov.o := n +CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
# cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO)
From: Takashi Iwai tiwai@suse.de
[ Upstream commit feb689025fbb6f0aa6297d3ddf97de945ea4ad32 ]
ALSA OSS sequencer calls the ioctl function indirectly via snd_seq_kernel_client_ctl(). While we already applied the protection against races between the normal ioctls and writes via the client's ioctl_mutex, this code path was left untouched. And this seems to be the cause of still remaining some rare UAF as spontaneously triggered by syzkaller.
For the sake of robustness, wrap the ioctl_mutex also for the call via snd_seq_kernel_client_ctl(), too.
Reported-by: syzbot+e4c8abb920efa77bace9@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/core/seq/seq_clientmgr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 3bcd7a2f0394..692631bd4a35 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2348,14 +2348,19 @@ int snd_seq_kernel_client_ctl(int clientid, unsigned int cmd, void *arg) { const struct ioctl_handler *handler; struct snd_seq_client *client; + int err;
client = clientptr(clientid); if (client == NULL) return -ENXIO;
for (handler = ioctl_handlers; handler->cmd > 0; ++handler) { - if (handler->cmd == cmd) - return handler->func(client, arg); + if (handler->cmd == cmd) { + mutex_lock(&client->ioctl_mutex); + err = handler->func(client, arg); + mutex_unlock(&client->ioctl_mutex); + return err; + } }
pr_debug("ALSA: seq unknown ioctl() 0x%x (type='%c', number=0x%02x)\n",
From: Takashi Iwai tiwai@suse.de
[ Upstream commit 2eabc5ec8ab4d4748a82050dfcb994119b983750 ]
The snd_seq_ioctl_get_subscription() retrieves the port subscriber information as a pointer, while the object isn't protected, hence it may be deleted before the actual reference. This race was spotted by syzkaller and may lead to a UAF.
The fix is simply copying the data in the lookup function that performs in the rwsem to protect against the deletion.
Reported-by: syzbot+9437020c82413d00222d@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/core/seq/seq_clientmgr.c | 10 ++-------- sound/core/seq/seq_ports.c | 13 ++++++++----- sound/core/seq/seq_ports.h | 5 +++-- 3 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 692631bd4a35..068880ac47b5 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1904,20 +1904,14 @@ static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client, int result; struct snd_seq_client *sender = NULL; struct snd_seq_client_port *sport = NULL; - struct snd_seq_subscribers *p;
result = -EINVAL; if ((sender = snd_seq_client_use_ptr(subs->sender.client)) == NULL) goto __end; if ((sport = snd_seq_port_use_ptr(sender, subs->sender.port)) == NULL) goto __end; - p = snd_seq_port_get_subscription(&sport->c_src, &subs->dest); - if (p) { - result = 0; - *subs = p->info; - } else - result = -ENOENT; - + result = snd_seq_port_get_subscription(&sport->c_src, &subs->dest, + subs); __end: if (sport) snd_seq_port_unlock(sport); diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index d21ece9f8d73..edcb7ab21bf5 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -635,20 +635,23 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
/* get matched subscriber */ -struct snd_seq_subscribers *snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp, - struct snd_seq_addr *dest_addr) +int snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp, + struct snd_seq_addr *dest_addr, + struct snd_seq_port_subscribe *subs) { - struct snd_seq_subscribers *s, *found = NULL; + struct snd_seq_subscribers *s; + int err = -ENOENT;
down_read(&src_grp->list_mutex); list_for_each_entry(s, &src_grp->list_head, src_list) { if (addr_match(dest_addr, &s->info.dest)) { - found = s; + *subs = s->info; + err = 0; break; } } up_read(&src_grp->list_mutex); - return found; + return err; }
/* diff --git a/sound/core/seq/seq_ports.h b/sound/core/seq/seq_ports.h index 26bd71f36c41..06003b36652e 100644 --- a/sound/core/seq/seq_ports.h +++ b/sound/core/seq/seq_ports.h @@ -135,7 +135,8 @@ int snd_seq_port_subscribe(struct snd_seq_client_port *port, struct snd_seq_port_subscribe *info);
/* get matched subscriber */ -struct snd_seq_subscribers *snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp, - struct snd_seq_addr *dest_addr); +int snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp, + struct snd_seq_addr *dest_addr, + struct snd_seq_port_subscribe *subs);
#endif
From: Takashi Iwai tiwai@suse.de
[ Upstream commit f0654ba94e33699b295ce4f3dc73094db6209035 ]
This reverts commit feb689025fbb6f0aa6297d3ddf97de945ea4ad32.
The fix attempt was incorrect, leading to the mutex deadlock through the close of OSS sequencer client. The proper fix needs more consideration, so let's revert it now.
Fixes: feb689025fbb ("ALSA: seq: Protect in-kernel ioctl calls with mutex") Reported-by: syzbot+47ded6c0f23016cde310@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/core/seq/seq_clientmgr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 068880ac47b5..7d2c07c1237c 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2342,19 +2342,14 @@ int snd_seq_kernel_client_ctl(int clientid, unsigned int cmd, void *arg) { const struct ioctl_handler *handler; struct snd_seq_client *client; - int err;
client = clientptr(clientid); if (client == NULL) return -ENXIO;
for (handler = ioctl_handlers; handler->cmd > 0; ++handler) { - if (handler->cmd == cmd) { - mutex_lock(&client->ioctl_mutex); - err = handler->func(client, arg); - mutex_unlock(&client->ioctl_mutex); - return err; - } + if (handler->cmd == cmd) + return handler->func(client, arg); }
pr_debug("ALSA: seq unknown ioctl() 0x%x (type='%c', number=0x%02x)\n",
From: Takashi Iwai tiwai@suse.de
[ Upstream commit 7c32ae35fbf9cffb7aa3736f44dec10c944ca18e ]
The call of unsubscribe_port() which manages the group count and module refcount from delete_and_unsubscribe_port() looks racy; it's not covered by the group list lock, and it's likely a cause of the reported unbalance at port deletion. Let's move the call inside the group list_mutex to plug the hole.
Reported-by: syzbot+e4c8abb920efa77bace9@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/core/seq/seq_ports.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index edcb7ab21bf5..c8fa4336bccd 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -550,10 +550,10 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, list_del_init(list); grp->exclusive = 0; write_unlock_irq(&grp->list_lock); - up_write(&grp->list_mutex);
if (!empty) unsubscribe_port(client, port, grp, &subs->info, ack); + up_write(&grp->list_mutex); }
/* connect two ports */
From: Vasily Gorbik gor@linux.ibm.com
[ Upstream commit 01eb42afb45719cb41bb32c278e068073738899d ]
arch/s390/lib/uaccess.c is built without kasan instrumentation. Kasan checks are performed explicitly in copy_from_user/copy_to_user functions. But since those functions could be inlined, calls from files like uaccess.c with instrumentation disabled won't generate kasan reports. This is currently the case with strncpy_from_user function which was revealed by newly added kasan test. Avoid inlining of copy_from_user/copy_to_user when the kernel is built with kasan support to make sure kasan checks are fully functional.
Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/include/asm/uaccess.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index cdd0f0d999e2..689eae8d3859 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -67,8 +67,10 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n); unsigned long __must_check raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifndef CONFIG_KASAN #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER +#endif
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
From: Venkata Narendra Kumar Gutta vnkgutta@codeaurora.org
[ Upstream commit edb16da34b084c66763f29bee42b4e6bb33c3d66 ]
Platform core is using pdev->name as the platform device name to do the binding of the devices with the drivers. But, when the platform driver overrides the platform device name with dev_set_name(), the pdev->name is pointing to a location which is freed and becomes an invalid parameter to do the binding match.
use-after-free instance:
[ 33.325013] BUG: KASAN: use-after-free in strcmp+0x8c/0xb0 [ 33.330646] Read of size 1 at addr ffffffc10beae600 by task modprobe [ 33.339068] CPU: 5 PID: 518 Comm: modprobe Tainted: G S W O 4.19.30+ #3 [ 33.346835] Hardware name: MTP (DT) [ 33.350419] Call trace: [ 33.352941] dump_backtrace+0x0/0x3b8 [ 33.356713] show_stack+0x24/0x30 [ 33.360119] dump_stack+0x160/0x1d8 [ 33.363709] print_address_description+0x84/0x2e0 [ 33.368549] kasan_report+0x26c/0x2d0 [ 33.372322] __asan_report_load1_noabort+0x2c/0x38 [ 33.377248] strcmp+0x8c/0xb0 [ 33.380306] platform_match+0x70/0x1f8 [ 33.384168] __driver_attach+0x78/0x3a0 [ 33.388111] bus_for_each_dev+0x13c/0x1b8 [ 33.392237] driver_attach+0x4c/0x58 [ 33.395910] bus_add_driver+0x350/0x560 [ 33.399854] driver_register+0x23c/0x328 [ 33.403886] __platform_driver_register+0xd0/0xe0
So, use dev_name(&pdev->dev), which fetches the platform device name from the kobject(dev->kobj->name) of the device instead of the pdev->name.
Signed-off-by: Venkata Narendra Kumar Gutta vnkgutta@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 9045c5f3734e..080038bbed39 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -855,7 +855,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a, if (len != -ENODEV) return len;
- len = snprintf(buf, PAGE_SIZE, "platform:%s\n", pdev->name); + len = snprintf(buf, PAGE_SIZE, "platform:%s\n", dev_name(&pdev->dev));
return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; } @@ -931,7 +931,7 @@ static int platform_uevent(struct device *dev, struct kobj_uevent_env *env) return rc;
add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX, - pdev->name); + dev_name(&pdev->dev)); return 0; }
@@ -940,7 +940,7 @@ static const struct platform_device_id *platform_match_id( struct platform_device *pdev) { while (id->name[0]) { - if (strcmp(pdev->name, id->name) == 0) { + if (strcmp(dev_name(&pdev->dev), id->name) == 0) { pdev->id_entry = id; return id; } @@ -984,7 +984,7 @@ static int platform_match(struct device *dev, struct device_driver *drv) return platform_match_id(pdrv->id_table, pdev) != NULL;
/* fall-back to driver name match */ - return (strcmp(pdev->name, drv->name) == 0); + return (strcmp(dev_name(&pdev->dev), drv->name) == 0); }
#ifdef CONFIG_PM_SLEEP
From: Young Xiao YangX92@hotmail.com
[ Upstream commit b281218ad4311a0342a40cb02fb17a363df08b48 ]
There is an out-of-bounds access to "config[len - 1]" array when the variable "len" is zero.
See commit dada6a43b040 ("kgdboc: fix KASAN global-out-of-bounds bug in param_set_kgdboc_var()") for details.
Signed-off-by: Young Xiao YangX92@hotmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/misc/kgdbts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index fc7efedbc4be..94cbc5c98cae 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1134,7 +1134,7 @@ static void kgdbts_put_char(u8 chr)
static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp) { - int len = strlen(kmessage); + size_t len = strlen(kmessage);
if (len >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); @@ -1154,7 +1154,7 @@ static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
strcpy(config, kmessage); /* Chop out \n char as a result of echo */ - if (config[len - 1] == '\n') + if (len && config[len - 1] == '\n') config[len - 1] = '\0';
/* Go and configure with the new params. */
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit 35399f87e271f7cf3048eab00a421a6519ac8441 ]
In configfs_register_group(), if create_default_group() failed, we forget to unlink the group. It will left a invalid item in the parent list, which may trigger the use-after-free issue seen below:
BUG: KASAN: use-after-free in __list_add_valid+0xd4/0xe0 lib/list_debug.c:26 Read of size 8 at addr ffff8881ef61ae20 by task syz-executor.0/5996
CPU: 1 PID: 5996 Comm: syz-executor.0 Tainted: G C 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xa9/0x10e lib/dump_stack.c:113 print_address_description+0x65/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 __list_add_valid+0xd4/0xe0 lib/list_debug.c:26 __list_add include/linux/list.h:60 [inline] list_add_tail include/linux/list.h:93 [inline] link_obj+0xb0/0x190 fs/configfs/dir.c:759 link_group+0x1c/0x130 fs/configfs/dir.c:784 configfs_register_group+0x56/0x1e0 fs/configfs/dir.c:1751 configfs_register_default_group+0x72/0xc0 fs/configfs/dir.c:1834 ? 0xffffffffc1be0000 iio_sw_trigger_init+0x23/0x1000 [industrialio_sw_trigger] do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f494ecbcc58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007f494ecbcc70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f494ecbd6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004
Allocated by task 5987: set_track mm/kasan/common.c:87 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:497 kmalloc include/linux/slab.h:545 [inline] kzalloc include/linux/slab.h:740 [inline] configfs_register_default_group+0x4c/0xc0 fs/configfs/dir.c:1829 0xffffffffc1bd0023 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Freed by task 5987: set_track mm/kasan/common.c:87 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:459 slab_free_hook mm/slub.c:1429 [inline] slab_free_freelist_hook mm/slub.c:1456 [inline] slab_free mm/slub.c:3003 [inline] kfree+0xe1/0x270 mm/slub.c:3955 configfs_register_default_group+0x9a/0xc0 fs/configfs/dir.c:1836 0xffffffffc1bd0023 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe
The buggy address belongs to the object at ffff8881ef61ae00 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 32 bytes inside of 192-byte region [ffff8881ef61ae00, ffff8881ef61aec0) The buggy address belongs to the page: page:ffffea0007bd8680 count:1 mapcount:0 mapping:ffff8881f6c03000 index:0xffff8881ef61a700 flags: 0x2fffc0000000200(slab) raw: 02fffc0000000200 ffffea0007ca4740 0000000500000005 ffff8881f6c03000 raw: ffff8881ef61a700 000000008010000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected
Memory state around the buggy address: ffff8881ef61ad00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8881ef61ad80: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc
ffff8881ef61ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^ ffff8881ef61ae80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8881ef61af00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Fixes: 5cf6a51e6062 ("configfs: allow dynamic group creation") Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- fs/configfs/dir.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d2a1a79fa324..d7955dc56737 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1755,12 +1755,19 @@ int configfs_register_group(struct config_group *parent_group,
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); ret = create_default_group(parent_group, group); - if (!ret) { - spin_lock(&configfs_dirent_lock); - configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); - spin_unlock(&configfs_dirent_lock); - } + if (ret) + goto err_out; + + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + inode_unlock(d_inode(parent)); + return 0; +err_out: inode_unlock(d_inode(parent)); + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); return ret; } EXPORT_SYMBOL(configfs_register_group);
From: Li Rongqing lirongqing@baidu.com
[ Upstream commit d6a2946a88f524a47cc9b79279667137899db807 ]
msgctl10 of ltp triggers the following lockup When CONFIG_KASAN is enabled on large memory SMP systems, the pages initialization can take a long time, if msgctl10 requests a huge block memory, and it will block rcu scheduler, so release cpu actively.
After adding schedule() in free_msg, free_msg can not be called when holding spinlock, so adding msg to a tmp list, and free it out of spinlock
rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: Tasks blocked on level-1 rcu_node (CPUs 16-31): P32505 rcu: Tasks blocked on level-1 rcu_node (CPUs 48-63): P34978 rcu: (detected by 11, t=35024 jiffies, g=44237529, q=16542267) msgctl10 R running task 21608 32505 2794 0x00000082 Call Trace: preempt_schedule_irq+0x4c/0xb0 retint_kernel+0x1b/0x2d RIP: 0010:__is_insn_slot_addr+0xfb/0x250 Code: 82 1d 00 48 8b 9b 90 00 00 00 4c 89 f7 49 c1 ee 03 e8 59 83 1d 00 48 b8 00 00 00 00 00 fc ff df 4c 39 eb 48 89 9d 58 ff ff ff <41> c6 04 06 f8 74 66 4c 8d 75 98 4c 89 f1 48 c1 e9 03 48 01 c8 48 RSP: 0018:ffff88bce041f758 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: dffffc0000000000 RBX: ffffffff8471bc50 RCX: ffffffff828a2a57 RDX: dffffc0000000000 RSI: dffffc0000000000 RDI: ffff88bce041f780 RBP: ffff88bce041f828 R08: ffffed15f3f4c5b3 R09: ffffed15f3f4c5b3 R10: 0000000000000001 R11: ffffed15f3f4c5b2 R12: 000000318aee9b73 R13: ffffffff8471bc50 R14: 1ffff1179c083ef0 R15: 1ffff1179c083eec kernel_text_address+0xc1/0x100 __kernel_text_address+0xe/0x30 unwind_get_return_address+0x2f/0x50 __save_stack_trace+0x92/0x100 create_object+0x380/0x650 __kmalloc+0x14c/0x2b0 load_msg+0x38/0x1a0 do_msgsnd+0x19e/0xcf0 do_syscall_64+0x117/0x400 entry_SYSCALL_64_after_hwframe+0x49/0xbe
rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: Tasks blocked on level-1 rcu_node (CPUs 0-15): P32170 rcu: (detected by 14, t=35016 jiffies, g=44237525, q=12423063) msgctl10 R running task 21608 32170 32155 0x00000082 Call Trace: preempt_schedule_irq+0x4c/0xb0 retint_kernel+0x1b/0x2d RIP: 0010:lock_acquire+0x4d/0x340 Code: 48 81 ec c0 00 00 00 45 89 c6 4d 89 cf 48 8d 6c 24 20 48 89 3c 24 48 8d bb e4 0c 00 00 89 74 24 0c 48 c7 44 24 20 b3 8a b5 41 <48> c1 ed 03 48 c7 44 24 28 b4 25 18 84 48 c7 44 24 30 d0 54 7a 82 RSP: 0018:ffff88af83417738 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 RAX: dffffc0000000000 RBX: ffff88bd335f3080 RCX: 0000000000000002 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88bd335f3d64 RBP: ffff88af83417758 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: ffffed13f3f745b2 R12: 0000000000000000 R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000000000 is_bpf_text_address+0x32/0xe0 kernel_text_address+0xec/0x100 __kernel_text_address+0xe/0x30 unwind_get_return_address+0x2f/0x50 __save_stack_trace+0x92/0x100 save_stack+0x32/0xb0 __kasan_slab_free+0x130/0x180 kfree+0xfa/0x2d0 free_msg+0x24/0x50 do_msgrcv+0x508/0xe60 do_syscall_64+0x117/0x400 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Davidlohr said: "So after releasing the lock, the msg rbtree/list is empty and new calls will not see those in the newly populated tmp_msg list, and therefore they cannot access the delayed msg freeing pointers, which is good. Also the fact that the node_cache is now freed before the actual messages seems to be harmless as this is wanted for msg_insert() avoiding GFP_ATOMIC allocations, and after releasing the info->lock the thing is freed anyway so it should not change things"
Link: http://lkml.kernel.org/r/1552029161-4957-1-git-send-email-lirongqing@baidu.c... Signed-off-by: Li RongQing lirongqing@baidu.com Signed-off-by: Zhang Yu zhangyu31@baidu.com Reviewed-by: Davidlohr Bueso dbueso@suse.de Cc: Manfred Spraul manfred@colorfullife.com Cc: Arnd Bergmann arnd@arndb.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- ipc/mqueue.c | 10 ++++++++-- ipc/msgutil.c | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d24025626310..5c0ae912f2f2 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -374,7 +374,8 @@ static void mqueue_evict_inode(struct inode *inode) struct user_struct *user; unsigned long mq_bytes, mq_treesize; struct ipc_namespace *ipc_ns; - struct msg_msg *msg; + struct msg_msg *msg, *nmsg; + LIST_HEAD(tmp_msg);
clear_inode(inode);
@@ -385,10 +386,15 @@ static void mqueue_evict_inode(struct inode *inode) info = MQUEUE_I(inode); spin_lock(&info->lock); while ((msg = msg_get(info)) != NULL) - free_msg(msg); + list_add_tail(&msg->m_list, &tmp_msg); kfree(info->node_cache); spin_unlock(&info->lock);
+ list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) { + list_del(&msg->m_list); + free_msg(msg); + } + /* Total amount of bytes accounted for the mqueue */ mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 84598025a6ad..e65593742e2b 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -18,6 +18,7 @@ #include <linux/utsname.h> #include <linux/proc_ns.h> #include <linux/uaccess.h> +#include <linux/sched.h>
#include "util.h"
@@ -64,6 +65,9 @@ static struct msg_msg *alloc_msg(size_t len) pseg = &msg->next; while (len > 0) { struct msg_msgseg *seg; + + cond_resched(); + alen = min(len, DATALEN_SEG); seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT); if (seg == NULL) @@ -176,6 +180,8 @@ void free_msg(struct msg_msg *msg) kfree(msg); while (seg != NULL) { struct msg_msgseg *tmp = seg->next; + + cond_resched(); kfree(seg); seg = tmp; }
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit c09581a52765a85f19fc35340127396d5e3379cc ]
KASAN reports this:
BUG: KASAN: global-out-of-bounds in qedi_dbg_err+0xda/0x330 [qedi] Read of size 31 at addr ffffffffc12b0ae0 by task syz-executor.0/2429
CPU: 0 PID: 2429 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 print_address_description+0x1c4/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 memcpy+0x1f/0x50 mm/kasan/common.c:130 qedi_dbg_err+0xda/0x330 [qedi] ? 0xffffffffc12d0000 qedi_init+0x118/0x1000 [qedi] ? 0xffffffffc12d0000 ? 0xffffffffc12d0000 ? 0xffffffffc12d0000 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f2d57e55c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bfa0 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 00000000200003c0 RDI: 0000000000000003 RBP: 00007f2d57e55c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f2d57e566bc R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004
The buggy address belongs to the variable: __func__.67584+0x0/0xffffffffffffd520 [qedi]
Memory state around the buggy address: ffffffffc12b0980: fa fa fa fa 00 04 fa fa fa fa fa fa 00 00 05 fa ffffffffc12b0a00: fa fa fa fa 00 00 04 fa fa fa fa fa 00 05 fa fa
ffffffffc12b0a80: fa fa fa fa 00 06 fa fa fa fa fa fa 00 02 fa fa
^ ffffffffc12b0b00: fa fa fa fa 00 00 04 fa fa fa fa fa 00 00 03 fa ffffffffc12b0b80: fa fa fa fa 00 00 02 fa fa fa fa fa 00 00 04 fa
Currently the qedi_dbg_* family of functions can overrun the end of the source string if it is less than the destination buffer length because of the use of a fixed sized memcpy. Remove the memset/memcpy calls to nfunc and just use func instead as it is always a null terminated string.
Reported-by: Hulk Robot hulkci@huawei.com Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qedi/qedi_dbg.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-)
diff --git a/drivers/scsi/qedi/qedi_dbg.c b/drivers/scsi/qedi/qedi_dbg.c index 8fd28b056f73..3383314a3882 100644 --- a/drivers/scsi/qedi/qedi_dbg.c +++ b/drivers/scsi/qedi/qedi_dbg.c @@ -16,10 +16,6 @@ qedi_dbg_err(struct qedi_dbg_ctx *qedi, const char *func, u32 line, { va_list va; struct va_format vaf; - char nfunc[32]; - - memset(nfunc, 0, sizeof(nfunc)); - memcpy(nfunc, func, sizeof(nfunc) - 1);
va_start(va, fmt);
@@ -28,9 +24,9 @@ qedi_dbg_err(struct qedi_dbg_ctx *qedi, const char *func, u32 line,
if (likely(qedi) && likely(qedi->pdev)) pr_err("[%s]:[%s:%d]:%d: %pV", dev_name(&qedi->pdev->dev), - nfunc, line, qedi->host_no, &vaf); + func, line, qedi->host_no, &vaf); else - pr_err("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf); + pr_err("[0000:00:00.0]:[%s:%d]: %pV", func, line, &vaf);
va_end(va); } @@ -41,10 +37,6 @@ qedi_dbg_warn(struct qedi_dbg_ctx *qedi, const char *func, u32 line, { va_list va; struct va_format vaf; - char nfunc[32]; - - memset(nfunc, 0, sizeof(nfunc)); - memcpy(nfunc, func, sizeof(nfunc) - 1);
va_start(va, fmt);
@@ -56,9 +48,9 @@ qedi_dbg_warn(struct qedi_dbg_ctx *qedi, const char *func, u32 line,
if (likely(qedi) && likely(qedi->pdev)) pr_warn("[%s]:[%s:%d]:%d: %pV", dev_name(&qedi->pdev->dev), - nfunc, line, qedi->host_no, &vaf); + func, line, qedi->host_no, &vaf); else - pr_warn("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf); + pr_warn("[0000:00:00.0]:[%s:%d]: %pV", func, line, &vaf);
ret: va_end(va); @@ -70,10 +62,6 @@ qedi_dbg_notice(struct qedi_dbg_ctx *qedi, const char *func, u32 line, { va_list va; struct va_format vaf; - char nfunc[32]; - - memset(nfunc, 0, sizeof(nfunc)); - memcpy(nfunc, func, sizeof(nfunc) - 1);
va_start(va, fmt);
@@ -85,10 +73,10 @@ qedi_dbg_notice(struct qedi_dbg_ctx *qedi, const char *func, u32 line,
if (likely(qedi) && likely(qedi->pdev)) pr_notice("[%s]:[%s:%d]:%d: %pV", - dev_name(&qedi->pdev->dev), nfunc, line, + dev_name(&qedi->pdev->dev), func, line, qedi->host_no, &vaf); else - pr_notice("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf); + pr_notice("[0000:00:00.0]:[%s:%d]: %pV", func, line, &vaf);
ret: va_end(va); @@ -100,10 +88,6 @@ qedi_dbg_info(struct qedi_dbg_ctx *qedi, const char *func, u32 line, { va_list va; struct va_format vaf; - char nfunc[32]; - - memset(nfunc, 0, sizeof(nfunc)); - memcpy(nfunc, func, sizeof(nfunc) - 1);
va_start(va, fmt);
@@ -115,9 +99,9 @@ qedi_dbg_info(struct qedi_dbg_ctx *qedi, const char *func, u32 line,
if (likely(qedi) && likely(qedi->pdev)) pr_info("[%s]:[%s:%d]:%d: %pV", dev_name(&qedi->pdev->dev), - nfunc, line, qedi->host_no, &vaf); + func, line, qedi->host_no, &vaf); else - pr_info("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf); + pr_info("[0000:00:00.0]:[%s:%d]: %pV", func, line, &vaf);
ret: va_end(va);
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit d0adee5d12752256ff0c87ad7f002f21fe49d618 ]
Fixes gcc '-Wunused-but-set-variable' warning:
drivers/scsi/qedi/qedi_iscsi.c: In function 'qedi_ep_connect': drivers/scsi/qedi/qedi_iscsi.c:813:23: warning: variable 'udev' set but not used [-Wunused-but-set-variable] drivers/scsi/qedi/qedi_iscsi.c:812:18: warning: variable 'cdev' set but not used [-Wunused-but-set-variable]
These have never been used since introduction.
Signed-off-by: YueHaibing yuehaibing@huawei.com Acked-by: Manish Rangankar mrangankar@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qedi/qedi_iscsi.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 0b7267e68336..94f3829b1974 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -817,8 +817,6 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, struct qedi_endpoint *qedi_ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct qed_dev *cdev = NULL; - struct qedi_uio_dev *udev = NULL; struct iscsi_path path_req; u32 msg_type = ISCSI_KEVENT_IF_DOWN; u32 iscsi_cid = QEDI_CID_RESERVED; @@ -838,8 +836,6 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, }
qedi = iscsi_host_priv(shost); - cdev = qedi->cdev; - udev = qedi->udev;
if (test_bit(QEDI_IN_OFFLINE, &qedi->flags) || test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
From: James Smart jsmart2021@gmail.com
[ Upstream commit c8cb261a072c88ca1aff0e804a30db4c7606521b ]
There was a missing qualification of a valid ndlp structure when calling to send an RRQ for an abort. Add the check.
Signed-off-by: Dick Kennedy dick.kennedy@broadcom.com Signed-off-by: James Smart jsmart2021@gmail.com Tested-by: Bart Van Assche bvanassche@acm.org Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_els.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 57cddbc4a977..ddd29752d96d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7065,7 +7065,10 @@ int lpfc_send_rrq(struct lpfc_hba *phba, struct lpfc_node_rrq *rrq) { struct lpfc_nodelist *ndlp = lpfc_findnode_did(rrq->vport, - rrq->nlp_DID); + rrq->nlp_DID); + if (!ndlp) + return 1; + if (lpfc_test_rrq_active(phba, ndlp, rrq->xritag)) return lpfc_issue_els_rrq(rrq->vport, ndlp, rrq->nlp_DID, rrq);
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit 7ba36eccb3f83983a651efd570b4f933ecad1b5c ]
The arm64 ptdump code can race with concurrent modification of the kernel page tables. At the time this was added, this was sound as:
* Modifications to leaf entries could result in stale information being logged, but would not result in a functional problem.
* Boot time modifications to non-leaf entries (e.g. freeing of initmem) were performed when the ptdump code cannot be invoked.
* At runtime, modifications to non-leaf entries only occurred in the vmalloc region, and these were strictly additive, as intermediate entries were never freed.
However, since commit:
commit 324420bf91f6 ("arm64: add support for ioremap() block mappings")
... it has been possible to create huge mappings in the vmalloc area at runtime, and as part of this existing intermediate levels of table my be removed and freed.
It's possible for the ptdump code to race with this, and continue to walk tables which have been freed (and potentially poisoned or reallocated). As a result of this, the ptdump code may dereference bogus addresses, which could be fatal.
Since huge-vmap is a TLB and memory optimization, we can disable it when the runtime ptdump code is in use to avoid this problem.
Cc: Catalin Marinas catalin.marinas@arm.com Fixes: 324420bf91f60582 ("arm64: add support for ioremap() block mappings") Acked-by: Ard Biesheuvel ard.biesheuvel@arm.com Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Anshuman Khandual anshuman.khandual@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/mm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6ac0d32d60a5..abb9d2ecc675 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -899,13 +899,18 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
int __init arch_ioremap_pud_supported(void) { - /* only 4k granule supports level 1 block mappings */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES); + /* + * Only 4k granule supports level 1 block mappings. + * SW table walks can't handle removal of intermediate entries. + */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); }
int __init arch_ioremap_pmd_supported(void) { - return 1; + /* See arch_ioremap_pud_supported() */ + return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); }
int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
From: Christoph Hellwig hch@lst.de
[ Upstream commit 3f98bcc58cd5f1e4668db289dcab771874cc0920 ]
We already have a proper stub if lightnvm is not enabled, so don't bother with the ifdef.
Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Keith Busch keith.busch@intel.com Reviewed-by: Chaitanya Kulkarni chaitanya.kulkarni@wdc.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 65f3f1a34b6b..d98ffb1ce629 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1042,10 +1042,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); default: -#ifdef CONFIG_NVM if (ns->ndev) return nvme_nvm_ioctl(ns, cmd, arg); -#endif if (is_sed_ioctl(cmd)) return sed_ioctl(ns->ctrl->opal_dev, cmd, (void __user *) arg);
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit 3d0818f5eba80fbe4c0addbfe6ddb2d19dc82cd4 ]
The Lex 3I380D industrial PC has 4 ethernet controllers on board which need pmc_plt_clk0 - 3 to function, add it to the critclk_systems DMI table, so that drivers/clk/x86/clk-pmc-atom.c will mark the clocks as CLK_CRITICAL and they will not get turned off.
Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Reported-and-tested-by: Semyon Verchenko semverchenko@factor-ts.ru Signed-off-by: Hans de Goede hdegoede@redhat.com Acked-by: Andy Shevchenko andy.shevchenko@gmail.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/pmc_atom.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 50f2a125cd2c..e3e1f83a2dd7 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -428,12 +428,21 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc) */ static const struct dmi_system_id critclk_systems[] = { { + /* pmc_plt_clk0 is used for an external HSIC USB HUB */ .ident = "MPL CEC1x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"), DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"), }, }, + { + /* pmc_plt_clk0 - 3 are used for the 4 ethernet controllers */ + .ident = "Lex 3I380D", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lex BayTrail"), + DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), + }, + }, { /*sentinel*/ } };
From: Steffen Dirkwinkel s.dirkwinkel@beckhoff.com
[ Upstream commit d6423bd03031c020121da26c41a26bd5cc6d0da3 ]
There are several Beckhoff Automation industrial PC boards which use pmc_plt_clk* clocks for ethernet controllers. This adds affected boards to critclk_systems DMI table so the clocks are marked as CLK_CRITICAL and not turned off.
Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: Steffen Dirkwinkel s.dirkwinkel@beckhoff.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/pmc_atom.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index e3e1f83a2dd7..d4d089c37944 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -443,6 +443,30 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), }, }, + { + /* pmc_plt_clk* - are used for ethernet controllers */ + .ident = "Beckhoff CB3163", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff Automation"), + DMI_MATCH(DMI_BOARD_NAME, "CB3163"), + }, + }, + { + /* pmc_plt_clk* - are used for ethernet controllers */ + .ident = "Beckhoff CB6263", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff Automation"), + DMI_MATCH(DMI_BOARD_NAME, "CB6263"), + }, + }, + { + /* pmc_plt_clk* - are used for ethernet controllers */ + .ident = "Beckhoff CB6363", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff Automation"), + DMI_MATCH(DMI_BOARD_NAME, "CB6363"), + }, + }, { /*sentinel*/ } };
From: Colin Ian King colin.king@canonical.com
[ Upstream commit d0c0d902339249c75da85fd9257a86cbb98dfaa5 ]
Currently an int is being shifted and the result is being cast to a u64 which leads to undefined behaviour if the shift is more than 31 bits. Fix this by casting the integer value 1 to u64 before the shift operation.
Addresses-Coverity: ("Bad shift operation") Fixes: 7b594769120b ("[SCSI] bnx2fc: Handle REC_TOV error code from firmware") Signed-off-by: Colin Ian King colin.king@canonical.com Acked-by: Saurav Kashyap skashyap@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 26de61d65a4d..1bdff75ae903 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -830,7 +830,7 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) ((u64)err_entry->data.err_warn_bitmap_hi << 32) | (u64)err_entry->data.err_warn_bitmap_lo; for (i = 0; i < BNX2FC_NUM_ERR_BITS; i++) { - if (err_warn_bit_map & (u64) (1 << i)) { + if (err_warn_bit_map & ((u64)1 << i)) { err_warn = i; break; }
From: Qian Cai cai@lca.pw
[ Upstream commit c01dafad77fea8d64c4fdca0a6031c980842ad65 ]
Several places (dimm_devs.c, core.c etc) include label.h but only label.c uses NSINDEX_SIGNATURE, so move its definition to label.c instead.
In file included from drivers/nvdimm/dimm_devs.c:23: drivers/nvdimm/label.h:41:19: warning: 'NSINDEX_SIGNATURE' defined but not used [-Wunused-const-variable=]
Also, some places abuse "/**" which is only reserved for the kernel-doc.
drivers/nvdimm/bus.c:648: warning: cannot understand function prototype: 'struct attribute_group nd_device_attribute_group = ' drivers/nvdimm/bus.c:677: warning: cannot understand function prototype: 'struct attribute_group nd_numa_attribute_group = '
Those are just some member assignments for the "struct attribute_group" instances and it can't be expressed in the kernel-doc.
Reviewed-by: Vishal Verma vishal.l.verma@intel.com Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/bus.c | 4 ++-- drivers/nvdimm/label.c | 2 ++ drivers/nvdimm/label.h | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index a6746a1f20ae..2f1b54fab399 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -608,7 +608,7 @@ static struct attribute *nd_device_attributes[] = { NULL, };
-/** +/* * nd_device_attribute_group - generic attributes for all devices on an nd bus */ struct attribute_group nd_device_attribute_group = { @@ -637,7 +637,7 @@ static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a, return a->mode; }
-/** +/* * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus */ struct attribute_group nd_numa_attribute_group = { diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 6a16017cc0d9..1fb3a2491131 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -25,6 +25,8 @@ static guid_t nvdimm_btt2_guid; static guid_t nvdimm_pfn_guid; static guid_t nvdimm_dax_guid;
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + static u32 best_seq(u32 a, u32 b) { a &= NSINDEX_SEQ_MASK; diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 1ebf4d3d01ba..9ed772db6900 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -38,8 +38,6 @@ enum { ND_NSINDEX_INIT = 0x1, };
-static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; - /** * struct nd_namespace_index - label set superblock * @sig: NAMESPACE_INDEX\0
From: Kees Cook keescook@chromium.org
[ Upstream commit fe48319243a626c860fd666ca032daacc2ba84a5 ]
When running under a pipe, some timer tests would not report output in real-time because stdout flushes were missing after printf()s that lacked a newline. This adds them to restore real-time status output that humans can enjoy.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/timers/adjtick.c | 1 + tools/testing/selftests/timers/leapcrash.c | 1 + tools/testing/selftests/timers/mqueue-lat.c | 1 + tools/testing/selftests/timers/nanosleep.c | 1 + tools/testing/selftests/timers/nsleep-lat.c | 1 + tools/testing/selftests/timers/raw_skew.c | 1 + tools/testing/selftests/timers/set-tai.c | 1 + tools/testing/selftests/timers/set-tz.c | 2 ++ tools/testing/selftests/timers/threadtest.c | 1 + tools/testing/selftests/timers/valid-adjtimex.c | 2 ++ 10 files changed, 12 insertions(+)
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 0caca3a06bd2..54d8d87f36b3 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -136,6 +136,7 @@ int check_tick_adj(long tickval)
eppm = get_ppm_drift(); printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm); + fflush(stdout);
tx1.modes = 0; adjtimex(&tx1); diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index 830c462f605d..dc80728ed191 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -101,6 +101,7 @@ int main(void) } clear_time_state(); printf("."); + fflush(stdout); } printf("[OK]\n"); return ksft_exit_pass(); diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index 1867db5d6f5e..7916cf5cc6ff 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -102,6 +102,7 @@ int main(int argc, char **argv) int ret;
printf("Mqueue latency : "); + fflush(stdout);
ret = mqueue_lat_test(); if (ret < 0) { diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index 8adb0bb51d4d..71b5441c2fd9 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -142,6 +142,7 @@ int main(int argc, char **argv) continue;
printf("Nanosleep %-31s ", clockstring(clockid)); + fflush(stdout);
length = 10; while (length <= (NSEC_PER_SEC * 10)) { diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index c3c3dc10db17..eb3e79ed7b4a 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -155,6 +155,7 @@ int main(int argc, char **argv) continue;
printf("nsleep latency %-26s ", clockstring(clockid)); + fflush(stdout);
length = 10; while (length <= (NSEC_PER_SEC * 10)) { diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index dcf73c5dab6e..b41d8dd0c40c 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -112,6 +112,7 @@ int main(int argv, char **argc) printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
printf("Estimating clock drift: "); + fflush(stdout); sleep(120);
get_monotonic_and_raw(&mon, &raw); diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c index 70fed27d8fd3..8c4179ee2ca2 100644 --- a/tools/testing/selftests/timers/set-tai.c +++ b/tools/testing/selftests/timers/set-tai.c @@ -55,6 +55,7 @@ int main(int argc, char **argv) printf("tai offset started at %i\n", ret);
printf("Checking tai offsets can be properly set: "); + fflush(stdout); for (i = 1; i <= 60; i++) { ret = set_tai(i); ret = get_tai(); diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c index 877fd5532fee..62bd33eb16f0 100644 --- a/tools/testing/selftests/timers/set-tz.c +++ b/tools/testing/selftests/timers/set-tz.c @@ -65,6 +65,7 @@ int main(int argc, char **argv) printf("tz_minuteswest started at %i, dst at %i\n", min, dst);
printf("Checking tz_minuteswest can be properly set: "); + fflush(stdout); for (i = -15*60; i < 15*60; i += 30) { ret = set_tz(i, dst); ret = get_tz_min(); @@ -76,6 +77,7 @@ int main(int argc, char **argv) printf("[OK]\n");
printf("Checking invalid tz_minuteswest values are caught: "); + fflush(stdout);
if (!set_tz(-15*60-1, dst)) { printf("[FAILED] %i didn't return failure!\n", -15*60-1); diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index 759c9c06f1a0..cf3e48919874 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -163,6 +163,7 @@ int main(int argc, char **argv) strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start)); printf("%s\n", buf); printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime); + fflush(stdout);
/* spawn */ for (i = 0; i < thread_count; i++) diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index d9d3ab93b31a..5397de708d3c 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -123,6 +123,7 @@ int validate_freq(void) /* Set the leap second insert flag */
printf("Testing ADJ_FREQ... "); + fflush(stdout); for (i = 0; i < NUM_FREQ_VALID; i++) { tx.modes = ADJ_FREQUENCY; tx.freq = valid_freq[i]; @@ -250,6 +251,7 @@ int set_bad_offset(long sec, long usec, int use_nano) int validate_set_offset(void) { printf("Testing ADJ_SETOFFSET... "); + fflush(stdout);
/* Test valid values */ if (set_offset(NSEC_PER_SEC - 1, 1))
From: Bernd Eckstein 3erndeckstein@gmail.com
[ Upstream commit 94d250fae48e6f873d8362308f5c4d02cd1b1fd2 ]
Fix a racing condition in ipheth.c that can lead to slow performance.
Bug: In ipheth_tx(), netif_wake_queue() may be called on the callback ipheth_sndbulk_callback(), _before_ netif_stop_queue() is called. When this happens, the queue is stopped longer than it needs to be, thus reducing network performance.
Fix: Move netif_stop_queue() in front of usb_submit_urb(). Now the order is always correct. In case, usb_submit_urb() fails, the queue is woken up again as callback will not fire.
Testing: This racing condition is usually not noticeable, as it has to occur very frequently to slowdown the network. The callback from the USB is usually triggered slow enough, so the situation does not appear. However, on a Ubuntu Linux on VMWare Workstation, running on Windows 10, the we loose the race quite often and the following speedup can be noticed:
Without this patch: Download: 4.10 Mbit/s, Upload: 4.01 Mbit/s With this patch: Download: 36.23 Mbit/s, Upload: 17.61 Mbit/s
Signed-off-by: Oliver Zweigle Oliver.Zweigle@faro.com Signed-off-by: Bernd Eckstein 3ernd.Eckstein@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/usb/ipheth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 3d8a70d3ea9b..3d71f1716390 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -437,17 +437,18 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) dev); dev->tx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ netif_stop_queue(net); retval = usb_submit_urb(dev->tx_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); dev->net->stats.tx_errors++; dev_kfree_skb_any(skb); + netif_wake_queue(net); } else { dev->net->stats.tx_packets++; dev->net->stats.tx_bytes += skb->len; dev_consume_skb_any(skb); - netif_stop_queue(net); }
return NETDEV_TX_OK;
From: Kloetzke Jan Jan.Kloetzke@preh.de
[ Upstream commit ad70411a978d1e6e97b1e341a7bde9a79af0c93d ]
When disconnecting cdc_ncm the kernel sporadically crashes shortly after the disconnect:
[ 57.868812] Unable to handle kernel NULL pointer dereference at virtual address 00000000 ... [ 58.006653] PC is at 0x0 [ 58.009202] LR is at call_timer_fn+0xec/0x1b4 [ 58.013567] pc : [<0000000000000000>] lr : [<ffffff80080f5130>] pstate: 00000145 [ 58.020976] sp : ffffff8008003da0 [ 58.024295] x29: ffffff8008003da0 x28: 0000000000000001 [ 58.029618] x27: 000000000000000a x26: 0000000000000100 [ 58.034941] x25: 0000000000000000 x24: ffffff8008003e68 [ 58.040263] x23: 0000000000000000 x22: 0000000000000000 [ 58.045587] x21: 0000000000000000 x20: ffffffc68fac1808 [ 58.050910] x19: 0000000000000100 x18: 0000000000000000 [ 58.056232] x17: 0000007f885aff8c x16: 0000007f883a9f10 [ 58.061556] x15: 0000000000000001 x14: 000000000000006e [ 58.066878] x13: 0000000000000000 x12: 00000000000000ba [ 58.072201] x11: ffffffc69ff1db30 x10: 0000000000000020 [ 58.077524] x9 : 8000100008001000 x8 : 0000000000000001 [ 58.082847] x7 : 0000000000000800 x6 : ffffff8008003e70 [ 58.088169] x5 : ffffffc69ff17a28 x4 : 00000000ffff138b [ 58.093492] x3 : 0000000000000000 x2 : 0000000000000000 [ 58.098814] x1 : 0000000000000000 x0 : 0000000000000000 ... [ 58.205800] [< (null)>] (null) [ 58.210521] [<ffffff80080f5298>] expire_timers+0xa0/0x14c [ 58.215937] [<ffffff80080f542c>] run_timer_softirq+0xe8/0x128 [ 58.221702] [<ffffff8008081120>] __do_softirq+0x298/0x348 [ 58.227118] [<ffffff80080a6304>] irq_exit+0x74/0xbc [ 58.232009] [<ffffff80080e17dc>] __handle_domain_irq+0x78/0xac [ 58.237857] [<ffffff8008080cf4>] gic_handle_irq+0x80/0xac ...
The crash happens roughly 125..130ms after the disconnect. This correlates with the 'delay' timer that is started on certain USB tx/rx errors in the URB completion handler.
The problem is a race of usbnet_stop() with usbnet_start_xmit(). In usbnet_stop() we call usbnet_terminate_urbs() to cancel all URBs in flight. This only makes sense if no new URBs are submitted concurrently, though. But the usbnet_start_xmit() can run at the same time on another CPU which almost unconditionally submits an URB. The error callback of the new URB will then schedule the timer after it was already stopped.
The fix adds a check if the tx queue is stopped after the tx list lock has been taken. This should reliably prevent the submission of new URBs while usbnet_terminate_urbs() does its job. The same thing is done on the rx side even though it might be safe due to other flags that are checked there.
Signed-off-by: Jan Klötzke Jan.Kloetzke@preh.de Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/usb/usbnet.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 32fc69539126..831a9cec700c 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -508,6 +508,7 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
if (netif_running (dev->net) && netif_device_present (dev->net) && + test_bit(EVENT_DEV_OPEN, &dev->flags) && !test_bit (EVENT_RX_HALT, &dev->flags) && !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) { switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) { @@ -1433,6 +1434,11 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, spin_unlock_irqrestore(&dev->txq.lock, flags); goto drop; } + if (netif_queue_stopped(net)) { + usb_autopm_put_interface_async(dev->intf); + spin_unlock_irqrestore(&dev->txq.lock, flags); + goto drop; + }
#ifdef CONFIG_PM /* if this triggers the device is still a sleep */
From: Paolo Bonzini pbonzini@redhat.com
[ Upstream commit 2924b52117b2812e9633d5ea337333299166d373 ]
According to the SDM, for MSR_IA32_PERFCTR0/1 "the lower-order 32 bits of each MSR may be written with any value, and the high-order 8 bits are sign-extended according to the value of bit 31", but the fixed counters in real hardware are limited to the width of the fixed counters ("bits beyond the width of the fixed-function counter are reserved and must be written as zeros"). Fix KVM to do the same.
Reported-by: Nadav Amit nadav.amit@gmail.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/pmu_intel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 5ab4a364348e..2729131fe9bf 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -235,11 +235,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - if (!msr_info->host_initiated) - data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if (msr_info->host_initiated) + pmc->counter = data; + else + pmc->counter = (s32)data; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + pmc->counter = data; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel)
From: Christian Borntraeger borntraeger@de.ibm.com
[ Upstream commit 19ec166c3f39fe1d3789888a74cc95544ac266d4 ]
kselftests exposed a problem in the s390 handling for memory slots. Right now we only do proper memory slot handling for creation of new memory slots. Neither MOVE, nor DELETION are handled properly. Let us implement those.
Signed-off-by: Christian Borntraeger borntraeger@de.ibm.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kvm/kvm-s390.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4f6adbea592b..66b7b6383d8b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3912,21 +3912,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int rc; - - /* If the basics of the memslot do not change, we do not want - * to update the gmap. Every update causes several unnecessary - * segment translation exceptions. This is usually handled just - * fine by the normal fault handler + gmap, but it will also - * cause faults on the prefix page of running guest CPUs. - */ - if (old->userspace_addr == mem->userspace_addr && - old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && - old->npages * PAGE_SIZE == mem->memory_size) - return; + int rc = 0;
- rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, - mem->guest_phys_addr, mem->memory_size); + switch (change) { + case KVM_MR_DELETE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + break; + case KVM_MR_MOVE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + if (rc) + break; + /* FALLTHROUGH */ + case KVM_MR_CREATE: + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + break; + case KVM_MR_FLAGS_ONLY: + break; + default: + WARN(1, "Unknown KVM MR CHANGE: %d\n", change); + } if (rc) pr_warn("failed to commit memory region\n"); return;
linux-stable-mirror@lists.linaro.org