From: Ruslan Babayev ruslan@babayev.com
[ Upstream commit 60f2208699ec08ff9fdf1f97639a661a92a18f1c ]
The ds4424_get_value function takes channel number as it's 3rd argument and translates it internally into I2C address using DS4424_DAC_ADDR macro. The caller ds4424_verify_chip was passing an already translated I2C address as its last argument.
Signed-off-by: Ruslan Babayev ruslan@babayev.com Cc: xe-linux-external@cisco.com Cc: Stable@vger.kernel.org Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/dac/ds4424.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c index 883a47562055..714a97f91319 100644 --- a/drivers/iio/dac/ds4424.c +++ b/drivers/iio/dac/ds4424.c @@ -166,7 +166,7 @@ static int ds4424_verify_chip(struct iio_dev *indio_dev) { int ret, val;
- ret = ds4424_get_value(indio_dev, &val, DS4424_DAC_ADDR(0)); + ret = ds4424_get_value(indio_dev, &val, 0); if (ret < 0) dev_err(&indio_dev->dev, "%s failed. ret: %d\n", __func__, ret);
From: Sean Nyekjaer sean@geanix.com
[ Upstream commit e6d12298310fa1dc11f1d747e05b168016057fdd ]
When using the hrtimer iio trigger timestamp isn't updated. If we use iio_get_time_ns it is updated correctly.
Fixes: 2a86487786b5c ("iio: adc: ti-ads8688: add trigger and buffer support") Signed-off-by: Sean Nyekjaer sean@geanix.com Cc: Stable@vger.kernel.org Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/ti-ads8688.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 8b4568edd5cb..7f16c77b99fb 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -397,7 +397,7 @@ static irqreturn_t ads8688_trigger_handler(int irq, void *p) }
iio_push_to_buffers_with_timestamp(indio_dev, buffer, - pf->timestamp); + iio_get_time_ns(indio_dev));
iio_trigger_notify_done(indio_dev->trig);
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit ca4e4efbefbbdde0a7bb3023ea08d491f4daf9b9 ]
These are accidentally returning positive EINVAL instead of negative -EINVAL. Some of the callers treat positive values as success.
Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Acked-by: Stefan Wahren stefan.wahren@i2se.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/vc04_services/bcm2835-camera/controls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/vc04_services/bcm2835-camera/controls.c b/drivers/staging/vc04_services/bcm2835-camera/controls.c index cff7b1e07153..b688ebc01740 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/controls.c +++ b/drivers/staging/vc04_services/bcm2835-camera/controls.c @@ -576,7 +576,7 @@ static int ctrl_set_image_effect(struct bm2835_mmal_dev *dev, dev->colourfx.enable ? "true" : "false", dev->colourfx.u, dev->colourfx.v, ret, (ret == 0 ? 0 : -EINVAL)); - return (ret == 0 ? 0 : EINVAL); + return (ret == 0 ? 0 : -EINVAL); }
static int ctrl_set_colfx(struct bm2835_mmal_dev *dev, @@ -600,7 +600,7 @@ static int ctrl_set_colfx(struct bm2835_mmal_dev *dev, "%s: After: mmal_ctrl:%p ctrl id:0x%x ctrl val:%d ret %d(%d)\n", __func__, mmal_ctrl, ctrl->id, ctrl->val, ret, (ret == 0 ? 0 : -EINVAL)); - return (ret == 0 ? 0 : EINVAL); + return (ret == 0 ? 0 : -EINVAL); }
static int ctrl_set_bitrate(struct bm2835_mmal_dev *dev,
From: Stephane Eranian eranian@google.com
[ Upstream commit 23e3983a466cd540ffdd2bbc6e0c51e31934f941 ]
This patch fixes an bug revealed by the following commit:
6b89d4c1ae85 ("perf/x86/intel: Fix INTEL_FLAGS_EVENT_CONSTRAINT* masking")
That patch modified INTEL_FLAGS_EVENT_CONSTRAINT() to only look at the event code when matching a constraint. If code+umask were needed, then the INTEL_FLAGS_UEVENT_CONSTRAINT() macro was needed instead. This broke with some of the constraints for PEBS events.
Several of them, including the one used for cycles:p, cycles:pp, cycles:ppp fell in that category and caused the event to be rejected in PEBS mode. In other words, on some platforms a cmdline such as:
$ perf top -e cycles:pp
would fail with -EINVAL.
This patch fixes this bug by properly using INTEL_FLAGS_UEVENT_CONSTRAINT() when needed in the PEBS constraint tables.
Reported-by: Ingo Molnar mingo@kernel.org Signed-off-by: Stephane Eranian eranian@google.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vince Weaver vincent.weaver@maine.edu Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/20190521005246.423-1-eranian@google.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/events/intel/ds.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index b7b01d762d32..e91814d1a27f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -684,7 +684,7 @@ struct event_constraint intel_core2_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), EVENT_CONSTRAINT_END };
@@ -693,7 +693,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END @@ -701,7 +701,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
struct event_constraint intel_slm_pebs_event_constraints[] = { /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1), /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END @@ -726,7 +726,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), EVENT_CONSTRAINT_END };
@@ -743,7 +743,7 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), EVENT_CONSTRAINT_END };
@@ -752,7 +752,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -767,9 +767,9 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -783,9 +783,9 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ @@ -806,9 +806,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ @@ -829,9 +829,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = { struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
From: Jagdish Motwani jagdish.motwani@sophos.com
[ Upstream commit 946c0d8e6ed43dae6527e878d0077c1e11015db0 ]
This patch fixes netfilter hook traversal when there are more than 1 hooks returning NF_QUEUE verdict. When the first queue reinjects the packet, 'nf_reinject' starts traversing hooks with a proper hook_index. However, if it again receives a NF_QUEUE verdict (by some other netfilter hook), it queues the packet with a wrong hook_index. So, when the second queue reinjects the packet, it re-executes hooks in between.
Fixes: 960632ece694 ("netfilter: convert hook list to an array") Signed-off-by: Jagdish Motwani jagdish.motwani@sophos.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_queue.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d67a96a25a68..7569ba00e732 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -238,6 +238,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, repeat: verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { + *index = i; if (verdict != NF_REPEAT) return verdict; goto repeat;
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit 719c7d563c17b150877cee03a4b812a424989dfa ]
BUG: KASAN: use-after-free in ip_vs_in.part.29+0xe8/0xd20 [ip_vs] Read of size 4 at addr ffff8881e9b26e2c by task sshd/5603
CPU: 0 PID: 5603 Comm: sshd Not tainted 4.19.39+ #30 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x270 kasan_report+0x179/0x2c0 ip_vs_in.part.29+0xe8/0xd20 [ip_vs] ip_vs_in+0xd8/0x170 [ip_vs] nf_hook_slow+0x5f/0xe0 __ip_local_out+0x1d5/0x250 ip_local_out+0x19/0x60 __tcp_transmit_skb+0xba1/0x14f0 tcp_write_xmit+0x41f/0x1ed0 ? _copy_from_iter_full+0xca/0x340 __tcp_push_pending_frames+0x52/0x140 tcp_sendmsg_locked+0x787/0x1600 ? tcp_sendpage+0x60/0x60 ? inet_sk_set_state+0xb0/0xb0 tcp_sendmsg+0x27/0x40 sock_sendmsg+0x6d/0x80 sock_write_iter+0x121/0x1c0 ? sock_sendmsg+0x80/0x80 __vfs_write+0x23e/0x370 vfs_write+0xe7/0x230 ksys_write+0xa1/0x120 ? __ia32_sys_read+0x50/0x50 ? __audit_syscall_exit+0x3ce/0x450 do_syscall_64+0x73/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7ff6f6147c60 Code: 73 01 c3 48 8b 0d 28 12 2d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 5d 73 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 RSP: 002b:00007ffd772ead18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000034 RCX: 00007ff6f6147c60 RDX: 0000000000000034 RSI: 000055df30a31270 RDI: 0000000000000003 RBP: 000055df30a31270 R08: 0000000000000000 R09: 0000000000000000 R10: 00007ffd772ead70 R11: 0000000000000246 R12: 00007ffd772ead74 R13: 00007ffd772eae20 R14: 00007ffd772eae24 R15: 000055df2f12ddc0
Allocated by task 6052: kasan_kmalloc+0xa0/0xd0 __kmalloc+0x10a/0x220 ops_init+0x97/0x190 register_pernet_operations+0x1ac/0x360 register_pernet_subsys+0x24/0x40 0xffffffffc0ea016d do_one_initcall+0x8b/0x253 do_init_module+0xe3/0x335 load_module+0x2fc0/0x3890 __do_sys_finit_module+0x192/0x1c0 do_syscall_64+0x73/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Freed by task 6067: __kasan_slab_free+0x130/0x180 kfree+0x90/0x1a0 ops_free_list.part.7+0xa6/0xc0 unregister_pernet_operations+0x18b/0x1f0 unregister_pernet_subsys+0x1d/0x30 ip_vs_cleanup+0x1d/0xd2f [ip_vs] __x64_sys_delete_module+0x20c/0x300 do_syscall_64+0x73/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9
The buggy address belongs to the object at ffff8881e9b26600 which belongs to the cache kmalloc-4096 of size 4096 The buggy address is located 2092 bytes inside of 4096-byte region [ffff8881e9b26600, ffff8881e9b27600) The buggy address belongs to the page: page:ffffea0007a6c800 count:1 mapcount:0 mapping:ffff888107c0e600 index:0x0 compound_mapcount: 0 flags: 0x17ffffc0008100(slab|head) raw: 0017ffffc0008100 dead000000000100 dead000000000200 ffff888107c0e600 raw: 0000000000000000 0000000080070007 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected
while unregistering ipvs module, ops_free_list calls __ip_vs_cleanup, then nf_unregister_net_hooks be called to do remove nf hook entries. It need a RCU period to finish, however net->ipvs is set to NULL immediately, which will trigger NULL pointer dereference when a packet is hooked and handled by ip_vs_in where net->ipvs is dereferenced.
Another scene is ops_free_list call ops_free to free the net_generic directly while __ip_vs_cleanup finished, then calling ip_vs_in will triggers use-after-free.
This patch moves nf_unregister_net_hooks from __ip_vs_cleanup() to __ip_vs_dev_cleanup(), where rcu_barrier() is called by unregister_pernet_device -> unregister_pernet_operations, that will do the needed grace period.
Reported-by: Hulk Robot hulkci@huawei.com Fixes: efe41606184e ("ipvs: convert to use pernet nf_hook api") Suggested-by: Julian Anastasov ja@ssi.bg Signed-off-by: YueHaibing yuehaibing@huawei.com Acked-by: Julian Anastasov ja@ssi.bg Signed-off-by: Simon Horman horms@verge.net.au Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a42c1bc7c698..62c0e80dcd71 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2280,7 +2280,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net);
- nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2295,6 +2294,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs);
From: Jeffrin Jose T jeffrin@rajagiritech.edu.in
[ Upstream commit 82ce6eb1dd13fd12e449b2ee2c2ec051e6f52c43 ]
A test for the basic NAT functionality uses ip command which needs veth device. There is a condition where the kernel support for veth is not compiled into the kernel and the test script breaks. This patch contains code for reasonable error display and correct code exit.
Signed-off-by: Jeffrin Jose T jeffrin@rajagiritech.edu.in Acked-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/nft_nat.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 8ec76681605c..f25f72a75cf3 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -23,7 +23,11 @@ ip netns add ns0 ip netns add ns1 ip netns add ns2
-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
ip -net ns0 link set lo up
From: Tony Lindgren tony@atomide.com
[ Upstream commit 1cc54078d104f5b4d7e9f8d55362efa5a8daffdb ]
We need to always call clkdm_clk_enable() and clkdm_clk_disable() even the clkctrl clock(s) enabled for the domain do not have any gate register bits. Otherwise clockdomains may never get enabled except when devices get probed with the legacy "ti,hwmods" devicetree property.
Fixes: 88a172526c32 ("clk: ti: add support for clkctrl clocks") Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/ti/clkctrl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 421b05392220..ca3218337fd7 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -137,9 +137,6 @@ static int _omap4_clkctrl_clk_enable(struct clk_hw *hw) int ret; union omap4_timeout timeout = { 0 };
- if (!clk->enable_bit) - return 0; - if (clk->clkdm) { ret = ti_clk_ll_ops->clkdm_clk_enable(clk->clkdm, hw->clk); if (ret) { @@ -151,6 +148,9 @@ static int _omap4_clkctrl_clk_enable(struct clk_hw *hw) } }
+ if (!clk->enable_bit) + return 0; + val = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
val &= ~OMAP4_MODULEMODE_MASK; @@ -179,7 +179,7 @@ static void _omap4_clkctrl_clk_disable(struct clk_hw *hw) union omap4_timeout timeout = { 0 };
if (!clk->enable_bit) - return; + goto exit;
val = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
From: Anju T Sudhakar anju@linux.vnet.ibm.com
[ Upstream commit b59bd3527fe3c1939340df558d7f9d568fc9f882 ]
Currently init_imc_pmu() can fail either because we try to register an IMC unit with an invalid domain (i.e an IMC node not supported by the kernel) or something went wrong while registering a valid IMC unit. In both the cases kernel provides a 'Register failed' error message.
For example when trace-imc node is not supported by the kernel, but skiboot advertises a trace-imc node we print:
IMC Unknown Device type IMC PMU (null) Register failed
To avoid confusion just print the unknown device type message, before attempting PMU registration, so the second message isn't printed.
Fixes: 8f95faaac56c ("powerpc/powernv: Detect and create IMC device") Reported-by: Pavaman Subramaniyam pavsubra@in.ibm.com Signed-off-by: Anju T Sudhakar anju@linux.vnet.ibm.com Reviewed-by: Madhavan Srinivasan maddy@linux.vnet.ibm.com [mpe: Reword change log a bit] Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/platforms/powernv/opal-imc.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 3d27f02695e4..828f6656f8f7 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -161,6 +161,10 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) struct imc_pmu *pmu_ptr; u32 offset;
+ /* Return for unknown domain */ + if (domain < 0) + return -EINVAL; + /* memory for pmu */ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr)
From: Jia-Ju Bai baijiaju1990@gmail.com
[ Upstream commit 5bce256f0b528624a34fe907db385133bb7be33e ]
In xhci_debugfs_create_slot(), kzalloc() can fail and dev->debugfs_private will be NULL. In xhci_debugfs_create_endpoint(), dev->debugfs_private is used without any null-pointer check, and can cause a null pointer dereference.
To fix this bug, a null-pointer check is added in xhci_debugfs_create_endpoint().
This bug is found by a runtime fuzzing tool named FIZZER written by us.
[subjet line change change, add potential -Mathais] Signed-off-by: Jia-Ju Bai baijiaju1990@gmail.com Reviewed-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Mathias Nyman mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/host/xhci-debugfs.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index cadc01336bf8..7ba6afc7ef23 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -440,6 +440,9 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, struct xhci_ep_priv *epriv; struct xhci_slot_priv *spriv = dev->debugfs_private;
+ if (!spriv) + return; + if (spriv->eps[ep_index]) return;
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit ccfb62f27beb295103e9392462b20a6ed807d0ea ]
The user can change the device_name with the IMSETDEVNAME ioctl, but we need to ensure that the user's name is NUL terminated. Otherwise it could result in a buffer overflow when we copy the name back to the user with IMGETDEVINFO ioctl.
I also changed two strcpy() calls which handle the name to strscpy(). Hopefully, there aren't any other ways to create a too long name, but it's nice to do this as a kernel hardening measure.
Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/isdn/mISDN/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index b2abc44fa5cb..a73337b74f41 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -394,7 +394,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) memcpy(di.channelmap, dev->channelmap, sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; - strcpy(di.name, dev_name(&dev->dev)); + strscpy(di.name, dev_name(&dev->dev), sizeof(di.name)); if (copy_to_user((void __user *)arg, &di, sizeof(di))) err = -EFAULT; } else @@ -677,7 +677,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) memcpy(di.channelmap, dev->channelmap, sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; - strcpy(di.name, dev_name(&dev->dev)); + strscpy(di.name, dev_name(&dev->dev), sizeof(di.name)); if (copy_to_user((void __user *)arg, &di, sizeof(di))) err = -EFAULT; } else @@ -691,6 +691,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = -EFAULT; break; } + dn.name[sizeof(dn.name) - 1] = '\0'; dev = get_mdevice(dn.id); if (dev) err = device_rename(&dev->dev, dn.name);
From: Frank van der Linden fllinden@amazon.com
[ Upstream commit 2ac44ab608705948564791ce1d15d43ba81a1e38 ]
For F17h AMD CPUs, the CPB capability ('Core Performance Boost') is forcibly set, because some versions of that chip incorrectly report that they do not have it.
However, a hypervisor may filter out the CPB capability, for good reasons. For example, KVM currently does not emulate setting the CPB bit in MSR_K7_HWCR, and unchecked MSR access errors will be thrown when trying to set it as a guest:
unchecked MSR access error: WRMSR to 0xc0010015 (tried to write 0x0000000001000011) at rIP: 0xffffffff890638f4 (native_write_msr+0x4/0x20)
Call Trace: boost_set_msr+0x50/0x80 [acpi_cpufreq] cpuhp_invoke_callback+0x86/0x560 sort_range+0x20/0x20 cpuhp_thread_fun+0xb0/0x110 smpboot_thread_fn+0xef/0x160 kthread+0x113/0x130 kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40
To avoid this issue, don't forcibly set the CPB capability for a CPU when running under a hypervisor.
Signed-off-by: Frank van der Linden fllinden@amazon.com Acked-by: Borislav Petkov bp@suse.de Cc: Andy Lutomirski luto@kernel.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: bp@alien8.de Cc: jiaxun.yang@flygoat.com Fixes: 0237199186e7 ("x86/CPU/AMD: Set the CPB bit unconditionally on F17h") Link: http://lkml.kernel.org/r/20190522221745.GA15789@dev-dsk-fllinden-2c-c1893d73... [ Minor edits to the changelog. ] Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/cpu/amd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6a25278e0092..da1f5e78363e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -819,8 +819,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN);
- /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ - if (!cpu_has(c, X86_FEATURE_CPB)) + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. + * Always set it, except when running under a hypervisor. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); }
From: Yabin Cui yabinc@google.com
[ Upstream commit 1b038c6e05ff70a1e66e3e571c2e6106bdb75f53 ]
In perf_output_put_handle(), an IRQ/NMI can happen in below location and write records to the same ring buffer:
... local_dec_and_test(&rb->nest) ... <-- an IRQ/NMI can happen here rb->user_page->data_head = head; ...
In this case, a value A is written to data_head in the IRQ, then a value B is written to data_head after the IRQ. And A > B. As a result, data_head is temporarily decreased from A to B. And a reader may see data_head < data_tail if it read the buffer frequently enough, which creates unexpected behaviors.
This can be fixed by moving dec(&rb->nest) to after updating data_head, which prevents the IRQ/NMI above from updating data_head.
[ Split up by peterz. ]
Signed-off-by: Yabin Cui yabinc@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@kernel.org Cc: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Namhyung Kim namhyung@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Stephane Eranian eranian@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Vince Weaver vincent.weaver@maine.edu Cc: mark.rutland@arm.com Fixes: ef60777c9abd ("perf: Optimize the perf_output() path by removing IRQ-disables") Link: http://lkml.kernel.org/r/20190517115418.224478157@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/events/ring_buffer.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 99c7f199f2d4..31edf1f39cca 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -52,11 +52,18 @@ static void perf_output_put_handle(struct perf_output_handle *handle) head = local_read(&rb->head);
/* - * IRQ/NMI can happen here, which means we can miss a head update. + * IRQ/NMI can happen here and advance @rb->head, causing our + * load above to be stale. */
- if (!local_dec_and_test(&rb->nest)) + /* + * If this isn't the outermost nesting, we don't have to update + * @rb->user_page->data_head. + */ + if (local_read(&rb->nest) > 1) { + local_dec(&rb->nest); goto out; + }
/* * Since the mmap() consumer (userspace) can run on a different CPU: @@ -88,9 +95,18 @@ static void perf_output_put_handle(struct perf_output_handle *handle) rb->user_page->data_head = head;
/* - * Now check if we missed an update -- rely on previous implied - * compiler barriers to force a re-read. + * We must publish the head before decrementing the nest count, + * otherwise an IRQ/NMI can publish a more recent head value and our + * write will (temporarily) publish a stale value. + */ + barrier(); + local_set(&rb->nest, 0); + + /* + * Ensure we decrement @rb->nest before we validate the @rb->head. + * Otherwise we cannot be sure we caught the 'last' nested update. */ + barrier(); if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); goto again;
From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 3f9fbe9bd86c534eba2faf5d840fd44c6049f50e ]
Similar to how decrementing rb->next too early can cause data_head to (temporarily) be observed to go backward, so too can this happen when we increment too late.
This barrier() ensures the rb->head load happens after the increment, both the one in the 'goto again' path, as the one from perf_output_get_handle() -- albeit very unlikely to matter for the latter.
Suggested-by: Yabin Cui yabinc@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Stephane Eranian eranian@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Vince Weaver vincent.weaver@maine.edu Cc: acme@kernel.org Cc: mark.rutland@arm.com Cc: namhyung@kernel.org Fixes: ef60777c9abd ("perf: Optimize the perf_output() path by removing IRQ-disables") Link: http://lkml.kernel.org/r/20190517115418.309516009@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/events/ring_buffer.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 31edf1f39cca..d32b9375ec0e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -49,6 +49,15 @@ static void perf_output_put_handle(struct perf_output_handle *handle) unsigned long head;
again: + /* + * In order to avoid publishing a head value that goes backwards, + * we must ensure the load of @rb->head happens after we've + * incremented @rb->nest. + * + * Otherwise we can observe a @rb->head value before one published + * by an IRQ/NMI happening between the load and the increment. + */ + barrier(); head = local_read(&rb->head);
/*
From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 4d839dd9e4356bbacf3eb0ab13a549b83b008c21 ]
We must use {READ,WRITE}_ONCE() on rb->user_page data such that concurrent usage will see whole values. A few key sites were missing this.
Suggested-by: Yabin Cui yabinc@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Stephane Eranian eranian@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Vince Weaver vincent.weaver@maine.edu Cc: acme@kernel.org Cc: mark.rutland@arm.com Cc: namhyung@kernel.org Fixes: 7b732a750477 ("perf_counter: new output ABI - part 1") Link: http://lkml.kernel.org/r/20190517115418.394192145@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/events/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index d32b9375ec0e..12f351b253bb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -101,7 +101,7 @@ static void perf_output_put_handle(struct perf_output_handle *handle) * See perf_output_begin(). */ smp_wmb(); /* B, matches C */ - rb->user_page->data_head = head; + WRITE_ONCE(rb->user_page->data_head, head);
/* * We must publish the head before decrementing the nest count, @@ -490,7 +490,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags); }
- rb->user_page->aux_head = rb->aux_head; + WRITE_ONCE(rb->user_page->aux_head, rb->aux_head); if (rb_need_aux_wakeup(rb)) wakeup = true;
@@ -522,7 +522,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
- rb->user_page->aux_head = rb->aux_head; + WRITE_ONCE(rb->user_page->aux_head, rb->aux_head); if (rb_need_aux_wakeup(rb)) { perf_output_wakeup(handle); handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
From: Randy Dunlap rdunlap@infradead.org
[ Upstream commit e9646f0f5bb62b7d43f0968f39d536cfe7123b53 ]
The gpio-adp5588 driver uses interfaces that are provided by GPIOLIB_IRQCHIP, so select that symbol in its Kconfig entry.
Fixes these build errors:
../drivers/gpio/gpio-adp5588.c: In function ‘adp5588_irq_handler’: ../drivers/gpio/gpio-adp5588.c:266:26: error: ‘struct gpio_chip’ has no member named ‘irq’ dev->gpio_chip.irq.domain, gpio)); ^ ../drivers/gpio/gpio-adp5588.c: In function ‘adp5588_irq_setup’: ../drivers/gpio/gpio-adp5588.c:298:2: error: implicit declaration of function ‘gpiochip_irqchip_add_nested’ [-Werror=implicit-function-declaration] ret = gpiochip_irqchip_add_nested(&dev->gpio_chip, ^ ../drivers/gpio/gpio-adp5588.c:307:2: error: implicit declaration of function ‘gpiochip_set_nested_irqchip’ [-Werror=implicit-function-declaration] gpiochip_set_nested_irqchip(&dev->gpio_chip, ^
Fixes: 459773ae8dbb ("gpio: adp5588-gpio: support interrupt controller") Reported-by: kbuild test robot lkp@intel.com Signed-off-by: Randy Dunlap rdunlap@infradead.org Cc: linux-gpio@vger.kernel.org Reviewed-by: Bartosz Golaszewski bgolaszewski@baylibre.com Acked-by: Michael Hennerich michael.hennerich@analog.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 4f52c3a8ec99..ed51221621a5 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -784,6 +784,7 @@ config GPIO_ADP5588 config GPIO_ADP5588_IRQ bool "Interrupt controller support for ADP5588" depends on GPIO_ADP5588=y + select GPIOLIB_IRQCHIP help Say yes here to enable the adp5588 to be used as an interrupt controller. It requires the driver to be built in the kernel.
From: Gen Zhang blackgod016574@gmail.com
[ Upstream commit 4e78921ba4dd0aca1cc89168f45039add4183f8e ]
The old_memmap flow in efi_call_phys_prolog() performs numerous memory allocations, and either does not check for failure at all, or it does but fails to propagate it back to the caller, which may end up calling into the firmware with an incomplete 1:1 mapping.
So let's fix this by returning NULL from efi_call_phys_prolog() on memory allocation failures only, and by handling this condition in the caller. Also, clean up any half baked sets of page tables that we may have created before returning with a NULL return value.
Note that any failure at this level will trigger a panic() two levels up, so none of this makes a huge difference, but it is a nice cleanup nonetheless.
[ardb: update commit log, add efi_call_phys_epilog() call on error path]
Signed-off-by: Gen Zhang blackgod016574@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rob Bradford robert.bradford@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190525112559.7917-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/platform/efi/efi.c | 2 ++ arch/x86/platform/efi/efi_64.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..353019d4e6c9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -86,6 +86,8 @@ static efi_status_t __init phys_efi_set_virtual_address_map( pgd_t *save_pgd;
save_pgd = efi_call_phys_prolog(); + if (!save_pgd) + return EFI_ABORTED;
/* Disable interrupts around EFI calls: */ local_irq_save(flags); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ee5d08f25ce4..dfc809b31c7c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -84,13 +84,15 @@ pgd_t * __init efi_call_phys_prolog(void)
if (!efi_enabled(EFI_OLD_MEMMAP)) { efi_switch_mm(&efi_mm); - return NULL; + return efi_mm.pgd; }
early_code_mapping_set_exec(1);
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + if (!save_pgd) + return NULL;
/* * Build 1:1 identity mapping for efi=old_map usage. Note that @@ -138,10 +140,11 @@ pgd_t * __init efi_call_phys_prolog(void) pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; }
-out: __flush_tlb_all(); - return save_pgd; +out: + efi_call_phys_epilog(save_pgd); + return NULL; }
void __init efi_call_phys_epilog(pgd_t *save_pgd)
On Sat, 8 Jun 2019 at 13:43, Sasha Levin sashal@kernel.org wrote:
From: Gen Zhang blackgod016574@gmail.com
[ Upstream commit 4e78921ba4dd0aca1cc89168f45039add4183f8e ]
The old_memmap flow in efi_call_phys_prolog() performs numerous memory allocations, and either does not check for failure at all, or it does but fails to propagate it back to the caller, which may end up calling into the firmware with an incomplete 1:1 mapping.
So let's fix this by returning NULL from efi_call_phys_prolog() on memory allocation failures only, and by handling this condition in the caller. Also, clean up any half baked sets of page tables that we may have created before returning with a NULL return value.
Note that any failure at this level will trigger a panic() two levels up, so none of this makes a huge difference, but it is a nice cleanup nonetheless.
[ardb: update commit log, add efi_call_phys_epilog() call on error path]
Signed-off-by: Gen Zhang blackgod016574@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rob Bradford robert.bradford@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190525112559.7917-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org
This was already discussed in the thread that proposed this patch for stable: please don't queue this right now, the patches are more likely to harm than hurt, and they certainly don't fix a security vulnerability, as has been claimed.
arch/x86/platform/efi/efi.c | 2 ++ arch/x86/platform/efi/efi_64.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..353019d4e6c9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -86,6 +86,8 @@ static efi_status_t __init phys_efi_set_virtual_address_map( pgd_t *save_pgd;
save_pgd = efi_call_phys_prolog();
if (!save_pgd)
return EFI_ABORTED; /* Disable interrupts around EFI calls: */ local_irq_save(flags);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ee5d08f25ce4..dfc809b31c7c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -84,13 +84,15 @@ pgd_t * __init efi_call_phys_prolog(void)
if (!efi_enabled(EFI_OLD_MEMMAP)) { efi_switch_mm(&efi_mm);
return NULL;
return efi_mm.pgd; } early_code_mapping_set_exec(1); n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
if (!save_pgd)
return NULL; /* * Build 1:1 identity mapping for efi=old_map usage. Note that
@@ -138,10 +140,11 @@ pgd_t * __init efi_call_phys_prolog(void) pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; }
-out: __flush_tlb_all();
return save_pgd;
+out:
efi_call_phys_epilog(save_pgd);
return NULL;
}
void __init efi_call_phys_epilog(pgd_t *save_pgd)
2.20.1
On Sun, Jun 09, 2019 at 08:14:29PM +0200, Ard Biesheuvel wrote:
On Sat, 8 Jun 2019 at 13:43, Sasha Levin sashal@kernel.org wrote:
From: Gen Zhang blackgod016574@gmail.com
[ Upstream commit 4e78921ba4dd0aca1cc89168f45039add4183f8e ]
The old_memmap flow in efi_call_phys_prolog() performs numerous memory allocations, and either does not check for failure at all, or it does but fails to propagate it back to the caller, which may end up calling into the firmware with an incomplete 1:1 mapping.
So let's fix this by returning NULL from efi_call_phys_prolog() on memory allocation failures only, and by handling this condition in the caller. Also, clean up any half baked sets of page tables that we may have created before returning with a NULL return value.
Note that any failure at this level will trigger a panic() two levels up, so none of this makes a huge difference, but it is a nice cleanup nonetheless.
[ardb: update commit log, add efi_call_phys_epilog() call on error path]
Signed-off-by: Gen Zhang blackgod016574@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rob Bradford robert.bradford@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190525112559.7917-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org
This was already discussed in the thread that proposed this patch for stable: please don't queue this right now, the patches are more likely to harm than hurt, and they certainly don't fix a security vulnerability, as has been claimed.
I've dropped this, thank you.
-- Thanks, Sasha
From: Biao Huang biao.huang@mediatek.com
[ Upstream commit 4523a5611526709ec9b4e2574f1bb7818212651e ]
Currently we will not update the receive descriptor tail pointer in stmmac_rx_refill. Rx dma will think no available descriptors and stop once received packets exceed DMA_RX_SIZE, so that the rx only test will fail.
Update the receive tail pointer in stmmac_rx_refill to add more descriptors to the rx channel, so packets can be received continually
Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx") Signed-off-by: Biao Huang biao.huang@mediatek.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 50c00822b2d8..45e64d71a93f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3319,6 +3319,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE); } rx_q->dirty_rx = entry; + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); }
/**
From: Kees Cook keescook@chromium.org
[ Upstream commit 3e66b7cc50ef921121babc91487e1fb98af1ba6e ]
Building with Clang reports the redundant use of MODULE_DEVICE_TABLE():
drivers/net/ethernet/dec/tulip/de4x5.c:2110:1: error: redefinition of '__mod_eisa__de4x5_eisa_ids_device_table' MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids); ^ ./include/linux/module.h:229:21: note: expanded from macro 'MODULE_DEVICE_TABLE' extern typeof(name) __mod_##type##__##name##_device_table \ ^ <scratch space>:90:1: note: expanded from here __mod_eisa__de4x5_eisa_ids_device_table ^ drivers/net/ethernet/dec/tulip/de4x5.c:2100:1: note: previous definition is here MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids); ^ ./include/linux/module.h:229:21: note: expanded from macro 'MODULE_DEVICE_TABLE' extern typeof(name) __mod_##type##__##name##_device_table \ ^ <scratch space>:85:1: note: expanded from here __mod_eisa__de4x5_eisa_ids_device_table ^
This drops the one further from the table definition to match the common use of MODULE_DEVICE_TABLE().
Fixes: 07563c711fbc ("EISA bus MODALIAS attributes support") Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/dec/tulip/de4x5.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 66535d1653f6..f16853c3c851 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -2107,7 +2107,6 @@ static struct eisa_driver de4x5_eisa_driver = { .remove = de4x5_eisa_remove, } }; -MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids); #endif
#ifdef CONFIG_PCI
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
[ Upstream commit 9a51c6b1f9e0239a9435db036b212498a2a3b75c ]
Both acpi_pci_need_resume() and acpi_dev_needs_resume() check if the current ACPI wakeup configuration of the device matches what is expected as far as system wakeup from sleep states is concerned, as reflected by the device_may_wakeup() return value for the device.
However, they only should do that if wakeup.flags.valid is set for the device's ACPI companion, because otherwise the wakeup.prepare_count value for it is meaningless.
Add the missing wakeup.flags.valid checks to these functions.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/device_pm.c | 4 ++-- drivers/pci/pci-acpi.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index a7c2673ffd36..1806260938e8 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -948,8 +948,8 @@ static bool acpi_dev_needs_resume(struct device *dev, struct acpi_device *adev) u32 sys_target = acpi_target_system_state(); int ret, state;
- if (!pm_runtime_suspended(dev) || !adev || - device_may_wakeup(dev) != !!adev->wakeup.prepare_count) + if (!pm_runtime_suspended(dev) || !adev || (adev->wakeup.flags.valid && + device_may_wakeup(dev) != !!adev->wakeup.prepare_count)) return true;
if (sys_target == ACPI_STATE_S0) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index f8436d1c4d45..f7218c1673ce 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -625,7 +625,8 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) if (!adev || !acpi_device_power_manageable(adev)) return false;
- if (device_may_wakeup(&dev->dev) != !!adev->wakeup.prepare_count) + if (adev->wakeup.flags.valid && + device_may_wakeup(&dev->dev) != !!adev->wakeup.prepare_count) return true;
if (acpi_target_system_state() == ACPI_STATE_S0)
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
[ Upstream commit d491f2b75237ef37d8867830ab7fad8d9659e853 ]
If a PCI driver leaves the device handled by it in D0 and calls pci_save_state() on the device in its ->suspend() or ->suspend_late() callback, it can expect the device to stay in D0 over the whole s2idle cycle. However, that may not be the case if there is a spurious wakeup while the system is suspended, because in that case pci_pm_suspend_noirq() will run again after pci_pm_resume_noirq() which calls pci_restore_state(), via pci_pm_default_resume_early(), so state_saved is cleared and the second iteration of pci_pm_suspend_noirq() will invoke pci_prepare_to_sleep() which may change the power state of the device.
To avoid that, add a new internal flag, skip_bus_pm, that will be set by pci_pm_suspend_noirq() when it runs for the first time during the given system suspend-resume cycle if the state of the device has been saved already and the device is still in D0. Setting that flag will cause the next iterations of pci_pm_suspend_noirq() to set state_saved for pci_pm_resume_noirq(), so that it always restores the device state from the originally saved data, and avoid calling pci_prepare_to_sleep() for the device.
Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle") Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Keith Busch keith.busch@intel.com Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/pci-driver.c | 17 ++++++++++++++++- include/linux/pci.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 33f3f475e5c6..7430f993567f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -734,6 +734,8 @@ static int pci_pm_suspend(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+ pci_dev->skip_bus_pm = false; + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND);
@@ -827,7 +829,20 @@ static int pci_pm_suspend_noirq(struct device *dev) } }
- if (!pci_dev->state_saved) { + if (pci_dev->skip_bus_pm) { + /* + * The function is running for the second time in a row without + * going through full resume, which is possible only during + * suspend-to-idle in a spurious wakeup case. Moreover, the + * device was originally left in D0, so its power state should + * not be changed here and the device register values saved + * originally should be restored on resume again. + */ + pci_dev->state_saved = true; + } else if (pci_dev->state_saved) { + if (pci_dev->current_state == PCI_D0) + pci_dev->skip_bus_pm = true; + } else { pci_save_state(pci_dev); if (pci_power_manageable(pci_dev)) pci_prepare_to_sleep(pci_dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index b1f297f4b7b0..94853094b6ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -342,6 +342,7 @@ struct pci_dev { D3cold, not set for devices powered on/off by the corresponding bridge */ + unsigned int skip_bus_pm:1; /* Internal: Skip bus-level PM */ unsigned int ignore_hotplug:1; /* Ignore hotplug events */ unsigned int hotplug_user_indicators:1; /* SlotCtl indicators controlled exclusively by
From: Jan Kara jack@suse.cz
[ Upstream commit 33ec3e53e7b1869d7851e59e126bdb0fe0bd1982 ]
Loop module allows calling LOOP_SET_FD while there are other openers of the loop device. Even exclusive ones. This can lead to weird consequences such as kernel deadlocks like:
mount_bdev() lo_ioctl() udf_fill_super() udf_load_vrs() sb_set_blocksize() - sets desired block size B udf_tread() sb_bread() __bread_gfp(bdev, block, B) loop_set_fd() set_blocksize() - now __getblk_slow() indefinitely loops because B != bdev block size
Fix the problem by disallowing LOOP_SET_FD ioctl when there are exclusive openers of a loop device.
[Deliberately chosen not to CC stable as a user with priviledges to trigger this race has other means of taking the system down and this has a potential of breaking some weird userspace setup]
Reported-and-tested-by: syzbot+10007d66ca02b08f0e60@syzkaller.appspotmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/block/loop.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f1e63eb7cbca..a443910f5d6f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -920,9 +920,20 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!file) goto out;
+ /* + * If we don't hold exclusive handle for the device, upgrade to it + * here to avoid changing device under exclusive owner. + */ + if (!(mode & FMODE_EXCL)) { + bdgrab(bdev); + error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd); + if (error) + goto out_putf; + } + error = mutex_lock_killable(&loop_ctl_mutex); if (error) - goto out_putf; + goto out_bdev;
error = -EBUSY; if (lo->lo_state != Lo_unbound) @@ -986,10 +997,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); + if (!(mode & FMODE_EXCL)) + blkdev_put(bdev, mode | FMODE_EXCL); return 0;
out_unlock: mutex_unlock(&loop_ctl_mutex); +out_bdev: + if (!(mode & FMODE_EXCL)) + blkdev_put(bdev, mode | FMODE_EXCL); out_putf: fput(file); out:
On Sat 08-06-19 07:42:03, Sasha Levin wrote:
From: Jan Kara jack@suse.cz
[ Upstream commit 33ec3e53e7b1869d7851e59e126bdb0fe0bd1982 ]
The same as for 5.1 - please don't merge to stable.
Honza
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit 1396500d673bd027683a0609ff84dca7eb6ea2e7 ]
The devcoredump needs to operate on a stable state of the MMU while it is writing the MMU state to the coredump. The missing lock allowed both the userspace submit, as well as the GPU job finish paths to mutate the MMU state while a coredump is under way.
Fixes: a8c21a5451d8 (drm/etnaviv: add initial etnaviv DRM driver) Reported-by: David Jander david@protonic.nl Signed-off-by: Lucas Stach l.stach@pengutronix.de Tested-by: David Jander david@protonic.nl Reviewed-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 9146e30e24a6..468dff2f7904 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -124,6 +124,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) return; etnaviv_dump_core = false;
+ mutex_lock(&gpu->mmu->lock); + mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
/* We always dump registers, mmu, ring and end marker */ @@ -166,6 +168,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); if (!iter.start) { + mutex_unlock(&gpu->mmu->lock); dev_warn(gpu->dev, "failed to allocate devcoredump file\n"); return; } @@ -233,6 +236,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) obj->base.size); }
+ mutex_unlock(&gpu->mmu->lock); + etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
From: Igor Russkikh Igor.Russkikh@aquantia.com
[ Upstream commit 31bafc49a7736989e4c2d9f7280002c66536e590 ]
In case no other traffic happening on the ring, full tx cleanup may not be completed. That may cause socket buffer to overflow and tx traffic to stuck until next activity on the ring happens.
This is due to logic error in budget variable decrementor. Variable is compared with zero, and then post decremented, causing it to become MAX_INT. Solution is remove decrementor from the `for` statement and rewrite it in a clear way.
Fixes: b647d3980948e ("net: aquantia: Add tx clean budget and valid budget handling logic") Signed-off-by: Igor Russkikh igor.russkikh@aquantia.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 6f3312350cac..b3c7994d73eb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -139,10 +139,10 @@ void aq_ring_queue_stop(struct aq_ring_s *ring) bool aq_ring_tx_clean(struct aq_ring_s *self) { struct device *dev = aq_nic_get_dev(self->aq_nic); - unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET; + unsigned int budget;
- for (; self->sw_head != self->hw_head && budget--; - self->sw_head = aq_ring_next_dx(self, self->sw_head)) { + for (budget = AQ_CFG_TX_CLEAN_BUDGET; + budget && self->sw_head != self->hw_head; budget--) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
if (likely(buff->is_mapped)) { @@ -167,6 +167,7 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
buff->pa = 0U; buff->eop_index = 0xffffU; + self->sw_head = aq_ring_next_dx(self, self->sw_head); }
return !!budget;
From: Dmitry Bogdanov dmitry.bogdanov@aquantia.com
[ Upstream commit eaeb3b7494ba9159323814a8ce8af06a9277d99b ]
Driver stops producing skbs on ring if a packet with FCS error was coalesced into LRO session. Ring gets hang forever.
Thats a logical error in driver processing descriptors: When rx_stat indicates MAC Error, next pointer and eop flags are not filled. This confuses driver so it waits for descriptor 0 to be filled by HW.
Solution is fill next pointer and eop flag even for packets with FCS error.
Fixes: bab6de8fd180b ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Igor Russkikh igor.russkikh@aquantia.com Signed-off-by: Dmitry Bogdanov dmitry.bogdanov@aquantia.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 61 ++++++++++--------- 1 file changed, 32 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 56363ff5c891..51cd1f98bcf0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -695,38 +695,41 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { /* MAC error or DMA error */ buff->is_error = 1U; - } else { - if (self->aq_nic_cfg->is_rss) { - /* last 4 byte */ - u16 rss_type = rxd_wb->type & 0xFU; - - if (rss_type && rss_type < 0x8U) { - buff->is_hash_l4 = (rss_type == 0x4 || - rss_type == 0x5); - buff->rss_hash = rxd_wb->rss_hash; - } + } + if (self->aq_nic_cfg->is_rss) { + /* last 4 byte */ + u16 rss_type = rxd_wb->type & 0xFU; + + if (rss_type && rss_type < 0x8U) { + buff->is_hash_l4 = (rss_type == 0x4 || + rss_type == 0x5); + buff->rss_hash = rxd_wb->rss_hash; } + }
- if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { - buff->len = rxd_wb->pkt_len % - AQ_CFG_RX_FRAME_MAX; - buff->len = buff->len ? - buff->len : AQ_CFG_RX_FRAME_MAX; - buff->next = 0U; - buff->is_eop = 1U; + if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { + buff->len = rxd_wb->pkt_len % + AQ_CFG_RX_FRAME_MAX; + buff->len = buff->len ? + buff->len : AQ_CFG_RX_FRAME_MAX; + buff->next = 0U; + buff->is_eop = 1U; + } else { + buff->len = + rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? + AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; + + if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status) { + /* LRO */ + buff->next = rxd_wb->next_desc_ptr; + ++ring->stats.rx.lro_packets; } else { - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { - /* LRO */ - buff->next = rxd_wb->next_desc_ptr; - ++ring->stats.rx.lro_packets; - } else { - /* jumbo */ - buff->next = - aq_ring_next_dx(ring, - ring->hw_head); - ++ring->stats.rx.jumbo_packets; - } + /* jumbo */ + buff->next = + aq_ring_next_dx(ring, + ring->hw_head); + ++ring->stats.rx.jumbo_packets; } } }
From: Yingjoe Chen yingjoe.chen@mediatek.com
[ Upstream commit a0692f0eef91354b62c2b4c94954536536be5425 ]
If I2C_M_RECV_LEN check failed, msgs[i].buf allocated by memdup_user will not be freed. Pump index up so it will be freed.
Fixes: 838bfa6049fb ("i2c-dev: Add support for I2C_M_RECV_LEN") Signed-off-by: Yingjoe Chen yingjoe.chen@mediatek.com Signed-off-by: Wolfram Sang wsa@the-dreams.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/i2c-dev.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index ccd76c71af09..cb07651f4b46 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -283,6 +283,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, msgs[i].len < 1 || msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { + i++; res = -EINVAL; break; }
From: Vadim Pasternak vadimp@mellanox.com
[ Upstream commit 13067ef73f337336e3149f5bb9f3fd05fe7f87a0 ]
Fix wrong order in probing routine initialization - field `base_addr' is used before it's initialized. Move assignment of 'priv->base_addr` to the beginning, prior the call to mlxcpld_i2c_read_comm(). Wrong order caused the first read of capability register to be executed at wrong offset 0x0 instead of 0x2000. By chance it was a "good garbage" at 0x0 offset.
Fixes: 313ce648b5a4 ("i2c: mlxcpld: Add support for extended transaction length for i2c-mlxcpld") Signed-off-by: Vadim Pasternak vadimp@mellanox.com Signed-off-by: Wolfram Sang wsa@the-dreams.de Cc: stable@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-mlxcpld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 745ed43a22d6..2fd717d8dd30 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -503,6 +503,7 @@ static int mlxcpld_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv);
priv->dev = &pdev->dev; + priv->base_addr = MLXPLAT_CPLD_LPC_I2C_BASE_ADDR;
/* Register with i2c layer */ mlxcpld_i2c_adapter.timeout = usecs_to_jiffies(MLXCPLD_I2C_XFER_TO); @@ -518,7 +519,6 @@ static int mlxcpld_i2c_probe(struct platform_device *pdev) mlxcpld_i2c_adapter.nr = pdev->id; priv->adap = mlxcpld_i2c_adapter; priv->adap.dev.parent = &pdev->dev; - priv->base_addr = MLXPLAT_CPLD_LPC_I2C_BASE_ADDR; i2c_set_adapdata(&priv->adap, priv);
err = i2c_add_numbered_adapter(&priv->adap);
From: Bard Liao yung-chuan.liao@linux.intel.com
[ Upstream commit fa763f1b2858752e6150ffff46886a1b7faffc82 ]
We observed the same issue as reported by commit a8d7bde23e7130686b7662 ("ALSA: hda - Force polling mode on CFL for fixing codec communication") We don't have a better solution. So apply the same workaround to CNL.
Signed-off-by: Bard Liao yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/pci/hda/hda_intel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 9bc8a7cb40ea..e895e2dd8764 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -378,6 +378,7 @@ enum {
#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) +#define IS_CNL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9dc8)
static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -1795,8 +1796,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, else chip->bdl_pos_adj = bdl_pos_adj[dev];
- /* Workaround for a communication error on CFL (bko#199007) */ - if (IS_CFL(pci)) + /* Workaround for a communication error on CFL (bko#199007) and CNL */ + if (IS_CFL(pci) || IS_CNL(pci)) chip->polling_mode = 1;
err = azx_bus_init(chip, model[dev], &pci_hda_io_ops);
From: Sahitya Tummala stummala@codeaurora.org
[ Upstream commit f6122ed2a4f9c9c1c073ddf6308d1b2ac10e0781 ]
In the vfs_statx() context, during path lookup, the dentry gets added to sd->s_dentry via configfs_attach_attr(). In the end, vfs_statx() kills the dentry by calling path_put(), which invokes configfs_d_iput(). Ideally, this dentry must be removed from sd->s_dentry but it doesn't if the sd->s_count >= 3. As a result, sd->s_dentry is holding reference to a stale dentry pointer whose memory is already freed up. This results in use-after-free issue, when this stale sd->s_dentry is accessed later in configfs_readdir() path.
This issue can be easily reproduced, by running the LTP test case - sh fs_racer_file_list.sh /config (https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/fs/ra...)
Fixes: 76ae281f6307 ('configfs: fix race between dentry put and lookup') Signed-off-by: Sahitya Tummala stummala@codeaurora.org Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- fs/configfs/dir.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 39843fa7e11b..073e788f7810 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -58,15 +58,13 @@ static void configfs_d_iput(struct dentry * dentry, if (sd) { /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - /* Coordinate with configfs_attach_attr where will increase - * sd->s_count and update sd->s_dentry to new allocated one. - * Only set sd->dentry to null when this dentry is the only - * sd owner. - * If not do so, configfs_d_iput may run just after - * configfs_attach_attr and set sd->s_dentry to null - * even it's still in use. + /* + * Set sd->s_dentry to null only when this dentry is the one + * that is going to be killed. Otherwise configfs_d_iput may + * run just after configfs_attach_attr and set sd->s_dentry to + * NULL even it's still in use. */ - if (atomic_read(&sd->s_count) <= 2) + if (sd->s_dentry == dentry) sd->s_dentry = NULL;
spin_unlock(&configfs_dirent_lock);
From: Shawn Landden shawn@git.icu
[ Upstream commit 97acec7df172cd1e450f81f5e293c0aa145a2797 ]
This strncat() is safe because the buffer was allocated with zalloc(), however gcc doesn't know that. Since the string always has 4 non-null bytes, just use memcpy() here.
CC /home/shawn/linux/tools/perf/util/data-convert-bt.o In file included from /usr/include/string.h:494, from /home/shawn/linux/tools/lib/traceevent/event-parse.h:27, from util/data-convert-bt.c:22: In function ‘strncat’, inlined from ‘string_set_value’ at util/data-convert-bt.c:274:4: /usr/include/powerpc64le-linux-gnu/bits/string_fortified.h:136:10: error: ‘__builtin_strncat’ output may be truncated copying 4 bytes from a string of length 4 [-Werror=stringop-truncation] 136 | return __builtin___strncat_chk (__dest, __src, __len, __bos (__dest)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Shawn Landden shawn@git.icu Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Namhyung Kim namhyung@kernel.org Cc: Wang Nan wangnan0@huawei.com LPU-Reference: 20190518183238.10954-1-shawn@git.icu Link: https://lkml.kernel.org/n/tip-289f1jice17ta7tr3tstm9jm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/data-convert-bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index abd38abf1d91..24f2a87cf91d 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -271,7 +271,7 @@ static int string_set_value(struct bt_ctf_field *field, const char *string) if (i > 0) strncpy(buffer, string, i); } - strncat(buffer + p, numstr, 4); + memcpy(buffer + p, numstr, 4); p += 3; } }
From: Namhyung Kim namhyung@kernel.org
[ Upstream commit 6584140ba9e6762dd7ec73795243289b914f31f9 ]
It seems that the current code lacks holding the namespace lock in thread__namespaces(). Otherwise it can see inconsistent results.
Signed-off-by: Namhyung Kim namhyung@kernel.org Cc: Hari Bathini hbathini@linux.vnet.ibm.com Cc: Jiri Olsa jolsa@redhat.com Cc: Krister Johansen kjlx@templeofstupid.com Link: http://lkml.kernel.org/r/20190522053250.207156-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/thread.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2048d393ece6..56007a7e0b4d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -128,7 +128,7 @@ void thread__put(struct thread *thread) } }
-struct namespaces *thread__namespaces(const struct thread *thread) +static struct namespaces *__thread__namespaces(const struct thread *thread) { if (list_empty(&thread->namespaces_list)) return NULL; @@ -136,10 +136,21 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); }
+struct namespaces *thread__namespaces(const struct thread *thread) +{ + struct namespaces *ns; + + down_read((struct rw_semaphore *)&thread->namespaces_lock); + ns = __thread__namespaces(thread); + up_read((struct rw_semaphore *)&thread->namespaces_lock); + + return ns; +} + static int __thread__set_namespaces(struct thread *thread, u64 timestamp, struct namespaces_event *event) { - struct namespaces *new, *curr = thread__namespaces(thread); + struct namespaces *new, *curr = __thread__namespaces(thread);
new = namespaces__new(event); if (!new)
From: Thomas Richter tmricht@linux.ibm.com
[ Upstream commit 6738028dd57df064b969d8392c943ef3b3ae705d ]
Command 'perf record' and 'perf report' on a system without kernel debuginfo packages uses /proc/kallsyms and /proc/modules to find addresses for kernel and module symbols. On x86 this works for root and non-root users.
On s390, when invoked as non-root user, many of the following warnings are shown and module symbols are missing:
proc/{kallsyms,modules} inconsistency while looking for "[sha1_s390]" module!
Command 'perf record' creates a list of module start addresses by parsing the output of /proc/modules and creates a PERF_RECORD_MMAP record for the kernel and each module. The following function call sequence is executed:
machine__create_kernel_maps machine__create_module modules__parse machine__create_module --> for each line in /proc/modules arch__fix_module_text_start
Function arch__fix_module_text_start() is s390 specific. It opens file /sys/module/<name>/sections/.text to extract the module's .text section start address. On s390 the module loader prepends a header before the first section, whereas on x86 the module's text section address is identical the the module's load address.
However module section files are root readable only. For non-root the read operation fails and machine__create_module() returns an error. Command perf record does not generate any PERF_RECORD_MMAP record for loaded modules. Later command perf report complains about missing module maps.
To fix this function arch__fix_module_text_start() always returns success. For root users there is no change, for non-root users the module's load address is used as module's text start address (the prepended header then counts as part of the text section).
This enable non-root users to use module symbols and avoid the warning when perf report is executed.
Output before:
[tmricht@m83lp54 perf]$ ./perf report -D | fgrep MMAP 0 0x168 [0x50]: PERF_RECORD_MMAP ... x [kernel.kallsyms]_text
Output after:
[tmricht@m83lp54 perf]$ ./perf report -D | fgrep MMAP 0 0x168 [0x50]: PERF_RECORD_MMAP ... x [kernel.kallsyms]_text 0 0x1b8 [0x98]: PERF_RECORD_MMAP ... x /lib/modules/.../autofs4.ko.xz 0 0x250 [0xa8]: PERF_RECORD_MMAP ... x /lib/modules/.../sha_common.ko.xz 0 0x2f8 [0x98]: PERF_RECORD_MMAP ... x /lib/modules/.../des_generic.ko.xz
Signed-off-by: Thomas Richter tmricht@linux.ibm.com Reviewed-by: Hendrik Brueckner brueckner@linux.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20190522144601.50763-4-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/arch/s390/util/machine.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 0b2054007314..a19690a17291 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -5,16 +5,19 @@ #include "util.h" #include "machine.h" #include "api/fs/fs.h" +#include "debug.h"
int arch__fix_module_text_start(u64 *start, const char *name) { + u64 m_start = *start; char path[PATH_MAX];
snprintf(path, PATH_MAX, "module/%.*s/sections/.text", (int)strlen(name) - 2, name + 1); - - if (sysfs__read_ull(path, (unsigned long long *)start) < 0) - return -1; + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { + pr_debug2("Using module %s start:%#lx\n", path, m_start); + *start = m_start; + }
return 0; }
From: Randy Dunlap rdunlap@infradead.org
[ Upstream commit 9a626c4a6326da4433a0d4d4a8a7d1571caf1ed3 ]
Fix build errors on ia64 when DISCONTIGMEM=y and NUMA=y by exporting paddr_to_nid().
Fixes these build errors:
ERROR: "paddr_to_nid" [sound/core/snd-pcm.ko] undefined! ERROR: "paddr_to_nid" [net/sunrpc/sunrpc.ko] undefined! ERROR: "paddr_to_nid" [fs/cifs/cifs.ko] undefined! ERROR: "paddr_to_nid" [drivers/video/fbdev/core/fb.ko] undefined! ERROR: "paddr_to_nid" [drivers/usb/mon/usbmon.ko] undefined! ERROR: "paddr_to_nid" [drivers/usb/core/usbcore.ko] undefined! ERROR: "paddr_to_nid" [drivers/md/raid1.ko] undefined! ERROR: "paddr_to_nid" [drivers/md/dm-mod.ko] undefined! ERROR: "paddr_to_nid" [drivers/md/dm-crypt.ko] undefined! ERROR: "paddr_to_nid" [drivers/md/dm-bufio.ko] undefined! ERROR: "paddr_to_nid" [drivers/ide/ide-core.ko] undefined! ERROR: "paddr_to_nid" [drivers/ide/ide-cd_mod.ko] undefined! ERROR: "paddr_to_nid" [drivers/gpu/drm/drm.ko] undefined! ERROR: "paddr_to_nid" [drivers/char/agp/agpgart.ko] undefined! ERROR: "paddr_to_nid" [drivers/block/nbd.ko] undefined! ERROR: "paddr_to_nid" [drivers/block/loop.ko] undefined! ERROR: "paddr_to_nid" [drivers/block/brd.ko] undefined! ERROR: "paddr_to_nid" [crypto/ccm.ko] undefined!
Reported-by: kbuild test robot lkp@intel.com Signed-off-by: Randy Dunlap rdunlap@infradead.org Cc: Tony Luck tony.luck@intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: linux-ia64@vger.kernel.org Signed-off-by: Tony Luck tony.luck@intel.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/ia64/mm/numa.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index aa19b7ac8222..476c7b4be378 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -49,6 +49,7 @@ paddr_to_nid(unsigned long paddr)
return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); } +EXPORT_SYMBOL(paddr_to_nid);
#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) /*
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit 41349672e3cbc2e8349831f21253509c3415aa2b ]
Fixes gcc '-Wunused-but-set-variable' warning:
drivers/xen/pvcalls-front.c: In function pvcalls_front_sendmsg: drivers/xen/pvcalls-front.c:543:25: warning: variable bedata set but not used [-Wunused-but-set-variable] drivers/xen/pvcalls-front.c: In function pvcalls_front_recvmsg: drivers/xen/pvcalls-front.c:638:25: warning: variable bedata set but not used [-Wunused-but-set-variable]
They are never used since introduction.
Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: Juergen Gross jgross@suse.com Signed-off-by: Boris Ostrovsky boris.ostrovsky@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/xen/pvcalls-front.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 91da7e44d5d4..3a144eecb6a7 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -538,7 +538,6 @@ static int __write_ring(struct pvcalls_data_intf *intf, int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - struct pvcalls_bedata *bedata; struct sock_mapping *map; int sent, tot_sent = 0; int count = 0, flags; @@ -550,7 +549,6 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, map = pvcalls_enter_sock(sock); if (IS_ERR(map)) return PTR_ERR(map); - bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
mutex_lock(&map->active.out_mutex); if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) { @@ -633,7 +631,6 @@ static int __read_ring(struct pvcalls_data_intf *intf, int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct pvcalls_bedata *bedata; int ret; struct sock_mapping *map;
@@ -643,7 +640,6 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, map = pvcalls_enter_sock(sock); if (IS_ERR(map)) return PTR_ERR(map); - bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
mutex_lock(&map->active.in_mutex); if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
From: Ross Lagerwall ross.lagerwall@citrix.com
[ Upstream commit d10e0cc113c9e1b64b5c6e3db37b5c839794f3df ]
During a suspend/resume, the xenwatch thread waits for all outstanding xenstore requests and transactions to complete. This does not work correctly for transactions started by userspace because it waits for them to complete after freezing userspace threads which means the transactions have no way of completing, resulting in a deadlock. This is trivial to reproduce by running this script and then suspending the VM:
import pyxs, time c = pyxs.client.Client(xen_bus_path="/dev/xen/xenbus") c.connect() c.transaction() time.sleep(3600)
Even if this deadlock were resolved, misbehaving userspace should not prevent a VM from being migrated. So, instead of waiting for these transactions to complete before suspending, store the current generation id for each transaction when it is started. The global generation id is incremented during resume. If the caller commits the transaction and the generation id does not match the current generation id, return EAGAIN so that they try again. If the transaction was instead discarded, return OK since no changes were made anyway.
This only affects users of the xenbus file interface. In-kernel users of xenbus are assumed to be well-behaved and complete all transactions before freezing.
Signed-off-by: Ross Lagerwall ross.lagerwall@citrix.com Reviewed-by: Juergen Gross jgross@suse.com Signed-off-by: Boris Ostrovsky boris.ostrovsky@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/xen/xenbus/xenbus.h | 3 +++ drivers/xen/xenbus/xenbus_dev_frontend.c | 18 ++++++++++++++++++ drivers/xen/xenbus/xenbus_xs.c | 7 +++++-- 3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 092981171df1..d75a2385b37c 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -83,6 +83,7 @@ struct xb_req_data { int num_vecs; int err; enum xb_req_state state; + bool user_req; void (*cb)(struct xb_req_data *); void *par; }; @@ -133,4 +134,6 @@ void xenbus_ring_ops_init(void); int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par); void xenbus_dev_queue_reply(struct xb_req_data *req);
+extern unsigned int xb_dev_generation_id; + #endif diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 0782ff3c2273..39c63152a358 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -62,6 +62,8 @@
#include "xenbus.h"
+unsigned int xb_dev_generation_id; + /* * An element of a list of outstanding transactions, for which we're * still waiting a reply. @@ -69,6 +71,7 @@ struct xenbus_transaction_holder { struct list_head list; struct xenbus_transaction handle; + unsigned int generation_id; };
/* @@ -441,6 +444,7 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } + trans->generation_id = xb_dev_generation_id; list_add(&trans->list, &u->transactions); } else if (msg->hdr.tx_id != 0 && !xenbus_get_transaction(u, msg->hdr.tx_id)) @@ -449,6 +453,20 @@ static int xenbus_write_transaction(unsigned msg_type, !(msg->hdr.len == 2 && (!strcmp(msg->body, "T") || !strcmp(msg->body, "F")))) return xenbus_command_reply(u, XS_ERROR, "EINVAL"); + else if (msg_type == XS_TRANSACTION_END) { + trans = xenbus_get_transaction(u, msg->hdr.tx_id); + if (trans && trans->generation_id != xb_dev_generation_id) { + list_del(&trans->list); + kfree(trans); + if (!strcmp(msg->body, "T")) + return xenbus_command_reply(u, XS_ERROR, + "EAGAIN"); + else + return xenbus_command_reply(u, + XS_TRANSACTION_END, + "OK"); + } + }
rc = xenbus_dev_request_and_reply(&msg->hdr, u); if (rc && trans) { diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 49a3874ae6bb..ddc18da61834 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -105,6 +105,7 @@ static void xs_suspend_enter(void)
static void xs_suspend_exit(void) { + xb_dev_generation_id++; spin_lock(&xs_state_lock); xs_suspend_active--; spin_unlock(&xs_state_lock); @@ -125,7 +126,7 @@ static uint32_t xs_request_enter(struct xb_req_data *req) spin_lock(&xs_state_lock); }
- if (req->type == XS_TRANSACTION_START) + if (req->type == XS_TRANSACTION_START && !req->user_req) xs_state_users++; xs_state_users++; rq_id = xs_request_id++; @@ -140,7 +141,7 @@ void xs_request_exit(struct xb_req_data *req) spin_lock(&xs_state_lock); xs_state_users--; if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) || - (req->type == XS_TRANSACTION_END && + (req->type == XS_TRANSACTION_END && !req->user_req && !WARN_ON_ONCE(req->msg.type == XS_ERROR && !strcmp(req->body, "ENOENT")))) xs_state_users--; @@ -286,6 +287,7 @@ int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) req->num_vecs = 1; req->cb = xenbus_dev_queue_reply; req->par = par; + req->user_req = true;
xs_send(req, msg);
@@ -313,6 +315,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->vec = iovec; req->num_vecs = num_vecs; req->cb = xs_wake_up; + req->user_req = false;
msg.req_id = 0; msg.tx_id = t.id;
From: Paul Mackerras paulus@ozlabs.org
[ Upstream commit 1659e27d2bc1ef47b6d031abe01b467f18cb72d9 ]
Currently the Book 3S KVM code uses kvm->lock to synchronize access to the kvm->arch.rtas_tokens list. Because this list is scanned inside kvmppc_rtas_hcall(), which is called with the vcpu mutex held, taking kvm->lock cause a lock inversion problem, which could lead to a deadlock.
To fix this, we add a new mutex, kvm->arch.rtas_token_lock, which nests inside the vcpu mutexes, and use that instead of kvm->lock when accessing the rtas token list.
This removes the lockdep_assert_held() in kvmppc_rtas_tokens_free(). At this point we don't hold the new mutex, but that is OK because kvmppc_rtas_tokens_free() is only called when the whole VM is being destroyed, and at that point nothing can be looking up a token in the list.
Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s.c | 1 + arch/powerpc/kvm/book3s_rtas.c | 14 ++++++-------- 3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bccc5051249e..2b6049e83970 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -299,6 +299,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + struct mutex rtas_token_lock; DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 87348e498c89..281f074581a3 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -840,6 +840,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) #ifdef CONFIG_PPC64 INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); INIT_LIST_HEAD(&kvm->arch.rtas_tokens); + mutex_init(&kvm->arch.rtas_token_lock); #endif
return kvm->arch.kvm_ops->init_vm(kvm); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 2d3b2b1cc272..8f2355138f80 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -146,7 +146,7 @@ static int rtas_token_undefine(struct kvm *kvm, char *name) { struct rtas_token_definition *d, *tmp;
- lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { if (rtas_name_matches(d->handler->name, name)) { @@ -167,7 +167,7 @@ static int rtas_token_define(struct kvm *kvm, char *name, u64 token) bool found; int i;
- lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock);
list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { if (d->token == token) @@ -206,14 +206,14 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT;
- mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.rtas_token_lock);
if (args.token) rc = rtas_token_define(kvm, args.name, args.token); else rc = rtas_token_undefine(kvm, args.name);
- mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.rtas_token_lock);
return rc; } @@ -245,7 +245,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) orig_rets = args.rets; args.rets = &args.args[be32_to_cpu(args.nargs)];
- mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { @@ -256,7 +256,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) } }
- mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.rtas_token_lock);
if (rc == 0) { args.rets = orig_rets; @@ -282,8 +282,6 @@ void kvmppc_rtas_tokens_free(struct kvm *kvm) { struct rtas_token_definition *d, *tmp;
- lockdep_assert_held(&kvm->lock); - list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { list_del(&d->list); kfree(d);
From: Paul Mackerras paulus@ozlabs.org
[ Upstream commit 5a3f49364c3ffa1107bd88f8292406e98c5d206c ]
Currently the HV KVM code takes the kvm->lock around calls to kvm_for_each_vcpu() and kvm_get_vcpu_by_id() (which can call kvm_for_each_vcpu() internally). However, that leads to a lock order inversion problem, because these are called in contexts where the vcpu mutex is held, but the vcpu mutexes nest within kvm->lock according to Documentation/virtual/kvm/locking.txt. Hence there is a possibility of deadlock.
To fix this, we simply don't take the kvm->lock mutex around these calls. This is safe because the implementations of kvm_for_each_vcpu() and kvm_get_vcpu_by_id() have been designed to be able to be called locklessly.
Signed-off-by: Paul Mackerras paulus@ozlabs.org Reviewed-by: Cédric Le Goater clg@kaod.org Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/kvm/book3s_hv.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3e3a71594e63..083dcedba11c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -426,12 +426,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - struct kvm_vcpu *ret; - - mutex_lock(&kvm->lock); - ret = kvm_get_vcpu_by_id(kvm, id); - mutex_unlock(&kvm->lock); - return ret; + return kvm_get_vcpu_by_id(kvm, id); }
static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) @@ -1309,7 +1304,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask;
- mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the @@ -1349,7 +1343,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); - mutex_unlock(&kvm->lock); }
static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
From: Sami Tolvanen samitolvanen@google.com
[ Upstream commit 8ef8f368ce72b5e17f7c1f1ef15c38dcfd0fef64 ]
Syscall wrappers in <asm/syscall_wrapper.h> use const struct pt_regs * as the argument type. Use const in syscall_fn_t as well to fix indirect call type mismatches with Control-Flow Integrity checking.
Signed-off-by: Sami Tolvanen samitolvanen@google.com Reviewed-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..58102652bf9e 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -20,7 +20,7 @@ #include <linux/compat.h> #include <linux/err.h>
-typedef long (*syscall_fn_t)(struct pt_regs *regs); +typedef long (*syscall_fn_t)(const struct pt_regs *regs);
extern const syscall_fn_t sys_call_table[];
linux-stable-mirror@lists.linaro.org