From: Su Yanjun suyj.fnst@cn.fujitsu.com
[ Upstream commit dd9ee3444014e8f28c0eefc9fffc9ac9c5248c12 ]
Recently we run a network test over ipcomp virtual tunnel.We find that if a ipv4 packet needs fragment, then the peer can't receive it.
We deep into the code and find that when packet need fragment the smaller fragment will be encapsulated by ipip not ipcomp. So when the ipip packet goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code always set skb'dev to the last fragment's dev. After ipv4 defrag processing, when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV error.
This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
Signed-off-by: Su Yanjun suyj.fnst@cn.fujitsu.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/ip_vti.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c35e0aa57e66c..e224ca2ea1c06 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -75,6 +75,33 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, return 0; }
+static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -83,6 +110,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); }
+static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -409,6 +444,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, };
+static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -562,6 +603,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed;
+ msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -571,6 +619,8 @@ static int __init vti_init(void)
rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed:
From: Florian Westphal fw@strlen.de
[ Upstream commit 35e6103861a3a970de6c84688c6e7a1f65b164ca ]
The check assumes that in transport mode, the first templates family must match the address family of the policy selector.
Syzkaller managed to build a template using MODE_ROUTEOPTIMIZATION, with ipv4-in-ipv6 chain, leading to following splat:
BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x1db/0x1854 Read of size 4 at addr ffff888063e57aa0 by task a.out/2050 xfrm_state_find+0x1db/0x1854 xfrm_tmpl_resolve+0x100/0x1d0 xfrm_resolve_and_create_bundle+0x108/0x1000 [..]
Problem is that addresses point into flowi4 struct, but xfrm_state_find treats them as being ipv6 because it uses templ->encap_family is used (AF_INET6 in case of reproducer) rather than family (AF_INET).
This patch inverts the logic: Enforce 'template family must match selector' EXCEPT for tunnel and BEET mode.
In BEET and Tunnel mode, xfrm_tmpl_resolve_one will have remote/local address pointers changed to point at the addresses found in the template, rather than the flowi ones, so no oob read will occur.
Reported-by: 3ntr0py1337@gmail.com Reported-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/xfrm/xfrm_user.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 017b1c91d58ea..05b72d45dda05 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1377,10 +1377,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family;
- if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL;
From: Stephane Eranian eranian@google.com
[ Upstream commit 1497e804d1a6e2bd9107ddf64b0310449f4673eb ]
This patch fixes an issue in cpumap.c when used with the TOPOLOGY header. In some configurations, some NUMA nodes may have no CPU (empty cpulist). Yet a cpumap map must be created otherwise perf abort with an error. This patch handles this case by creating a dummy map.
Before:
$ perf record -o - -e cycles noploop 2 | perf script -i - 0x6e8 [0x6c]: failed to process type: 80
After:
$ perf record -o - -e cycles noploop 2 | perf script -i - noploop for 2 seconds
Signed-off-by: Stephane Eranian eranian@google.com Acked-by: Jiri Olsa jolsa@kernel.org Cc: Andi Kleen ak@linux.intel.com Cc: Kan Liang kan.liang@linux.intel.com Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/1547885559-1657-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/cpumap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c4e55b71010c6..2ccfeb78fd5da 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -121,7 +121,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map();
- if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out;
while (isdigit(*cpu_list)) { @@ -168,8 +173,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out:
From: ZhangXiaoxu zhangxiaoxu5@huawei.com
[ Upstream commit 53ab60baa1ac4f20b080a22c13b77b6373922fd7 ]
There is a UBSAN bug report as below: UBSAN: Undefined behaviour in net/netfilter/ipvs/ip_vs_ctl.c:2227:21 signed integer overflow: -2147483647 * 1000 cannot be represented in type 'int'
Reproduce program: #include <stdio.h> #include <sys/types.h> #include <sys/socket.h>
#define IPPROTO_IP 0 #define IPPROTO_RAW 255
#define IP_VS_BASE_CTL (64+1024+64) #define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10)
/* The argument to IP_VS_SO_GET_TIMEOUT */ struct ipvs_timeout_t { int tcp_timeout; int tcp_fin_timeout; int udp_timeout; };
int main() { int ret = -1; int sockfd = -1; struct ipvs_timeout_t to;
sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (sockfd == -1) { printf("socket init error\n"); return -1; }
to.tcp_timeout = -2147483647; to.tcp_fin_timeout = -2147483647; to.udp_timeout = -2147483647;
ret = setsockopt(sockfd, IPPROTO_IP, IP_VS_SO_SET_TIMEOUT, (char *)(&to), sizeof(to));
printf("setsockopt return %d\n", ret); return ret; }
Return -EINVAL if the timeout value is negative or max than 'INT_MAX / HZ'.
Signed-off-by: ZhangXiaoxu zhangxiaoxu5@huawei.com Acked-by: Simon Horman horms@verge.net.au Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9b1452e8e868f..444a0cb33e979 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2182,6 +2182,18 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) u->tcp_fin_timeout, u->udp_timeout);
+#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
From: Suravee Suthikulpanit suravee.suthikulpanit@amd.com
[ Upstream commit 9825bd94e3a2baae1f4874767ae3a7d4c049720e ]
When a VM is terminated, the VFIO driver detaches all pass-through devices from VFIO domain by clearing domain id and page table root pointer from each device table entry (DTE), and then invalidates the DTE. Then, the VFIO driver unmap pages and invalidate IOMMU pages.
Currently, the IOMMU driver keeps track of which IOMMU and how many devices are attached to the domain. When invalidate IOMMU pages, the driver checks if the IOMMU is still attached to the domain before issuing the invalidate page command.
However, since VFIO has already detached all devices from the domain, the subsequent INVALIDATE_IOMMU_PAGES commands are being skipped as there is no IOMMU attached to the domain. This results in data corruption and could cause the PCI device to end up in indeterministic state.
Fix this by invalidate IOMMU pages when detach a device, and before decrementing the per-domain device reference counts.
Cc: Boris Ostrovsky boris.ostrovsky@oracle.com Suggested-by: Joerg Roedel joro@8bytes.org Co-developed-by: Brijesh Singh brijesh.singh@amd.com Signed-off-by: Brijesh Singh brijesh.singh@amd.com Signed-off-by: Suravee Suthikulpanit suravee.suthikulpanit@amd.com Fixes: 6de8ad9b9ee0 ('x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs') Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/amd_iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5cf388ad1555d..48a73c48876b9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2132,14 +2132,11 @@ static void do_attach(struct iommu_dev_data *dev_data,
static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -2147,6 +2144,16 @@ static void do_detach(struct iommu_dev_data *dev_data)
/* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; }
/*
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 32a7726c4f4aadfabdb82440d84f88a5a2c8fe13 ]
- add missing memory barriers to the secondary CPU synchronization spin loops; add comment to the matching memory barrier in the boot_secondary and __cpu_die functions; - use READ_ONCE/WRITE_ONCE to access cpu_start_id/cpu_start_ccount instead of reading/writing them directly; - re-initialize cpu_running every time before starting secondary CPU to flush possible previous CPU startup results.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/head.S | 5 ++++- arch/xtensa/kernel/smp.c | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 64140998e1173..16bdb414784ad 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -281,12 +281,13 @@ should_never_return:
movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -323,11 +324,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 4d02e38514f54..545144d1431d5 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -192,9 +192,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i;
#ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1);
@@ -203,18 +205,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount);
- cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount);
- while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout));
- if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -234,6 +239,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack);
+ init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -295,8 +301,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; }
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 306b38305c0f86de7f17c5b091a95451dcc93d7d ]
Secondary CPU reset vector overlaps part of the double exception handler code, resulting in weird crashes and hangups when running user code. Move exception vectors one page up so that they don't clash with the secondary CPU reset vector.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/configs/smp_lx200_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 22eeacba37ccd..199e05f85e892 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -35,6 +35,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug"
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 8b1c42cdd7181200dc1fff39dcb6ac1a3fac2c25 ]
Otherwise it is impossible to enable CPUs after booting with 'maxcpus' parameter.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 545144d1431d5..0e34c1ed4aa8f 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -80,7 +80,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i;
- for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); }
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 25384ce5f9530def39421597b1457d9462df6455 ]
This fixes the following warning at boot when the kernel is booted on a board with more CPU cores than was configured in NR_CPUS:
smp_init_cpus: Core Count = 8 smp_init_cpus: Core Id = 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/cpumask.h:121 smp_init_cpus+0x54/0x74 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc3-00015-g1459333f88a0 #124 Call Trace: __warn$part$3+0x6a/0x7c warn_slowpath_null+0x35/0x3c smp_init_cpus+0x54/0x74 setup_arch+0x1c0/0x1d0 start_kernel+0x44/0x310 _startup+0x107/0x107
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 0e34c1ed4aa8f..54bb8e0473a06 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -93,6 +93,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id);
+ if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); }
From: Tomonori Sakita tomonori.sakita@sord.co.jp
[ Upstream commit 6571ebce112a21ec9be68ef2f53b96fcd41fd81b ]
If fill_level was not zero and status was not BUSY, result of "tx_prod - tx_cons - inuse" might be zero. Subtracting 1 unconditionally results invalid negative return value on this case. Make sure not to return an negative value.
Signed-off-by: Tomonori Sakita tomonori.sakita@sord.co.jp Signed-off-by: Atsushi Nemoto atsushi.nemoto@sord.co.jp Reviewed-by: Dalon L Westergreen dalon.westergreen@linux.intel.com Acked-by: Thor Thayer thor.thayer@linux.intel.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/altera/altera_msgdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba32905..0ae723f753417 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff;
if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status));
From: Yao Liu yotta.liu@ucloud.cn
[ Upstream commit 80ff00172407e0aad4b10b94ef0816fc3e7813cb ]
There is a NULL pointer dereference of dev_name in nfs_parse_devname()
The oops looks something like:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:nfs_fs_mount+0x3b6/0xc20 [nfs] ... Call Trace: ? ida_alloc_range+0x34b/0x3d0 ? nfs_clone_super+0x80/0x80 [nfs] ? nfs_free_parsed_mount_data+0x60/0x60 [nfs] mount_fs+0x52/0x170 ? __init_waitqueue_head+0x3b/0x50 vfs_kern_mount+0x6b/0x170 do_mount+0x216/0xdc0 ksys_mount+0x83/0xd0 __x64_sys_mount+0x25/0x30 do_syscall_64+0x65/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Fix this by adding a NULL check on dev_name
Signed-off-by: Yao Liu yotta.liu@ucloud.cn Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/nfs/super.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dbdc2d2f91cf5..1a81613dd0996 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1876,6 +1876,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end;
+ if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']');
From: Ming Lu ming.lu@citrix.com
[ Upstream commit 5d8fc4a9f0eec20b6c07895022a6bea3fb6dfb38 ]
The issue to be fixed in this commit is when libfc found it received a invalid FLOGI response from FC switch, it would return without freeing the fc frame, which is just the skb data. This would cause memory leak if FC switch keeps sending invalid FLOGI responses.
This fix is just to make it execute `fc_frame_free(fp)` before returning from function `fc_lport_flogi_resp`.
Signed-off-by: Ming Lu ming.lu@citrix.com Reviewed-by: Hannes Reinecke hare@suse.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/libfc/fc_lport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index e01a29863c384..867fc036d6ef5 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1739,14 +1739,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; }
flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; }
mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1756,7 +1756,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; }
if (mfs <= lport->mfs) {
From: Sinan Kaya okaya@kernel.org
[ Upstream commit 0ee4b5f801b73b83a9fb3921d725f2162fd4a2e5 ]
Add BACKLIGHT_LCD_SUPPORT for SAMSUNG_Q10 to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE.
SAMSUNG_Q10 selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT.
Copy BACKLIGHT_LCD_SUPPORT dependency into SAMSUNG_Q10 to fix:
WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - SAMSUNG_Q10 [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y]
Signed-off-by: Sinan Kaya okaya@kernel.org Acked-by: Andy Shevchenko andy.shevchenko@gmail.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index a2eabe6ff9ada..99913af2338c1 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -806,6 +806,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10
From: Ronnie Sahlberg lsahlber@redhat.com
[ Upstream commit 58d15ed1203f4d858c339ea4d7dafa94bd2a56d3 ]
The size of the fixed part of the create response is 88 bytes not 56.
Signed-off-by: Ronnie Sahlberg lsahlber@redhat.com Signed-off-by: Steve French stfrench@microsoft.com Reviewed-by: Pavel Shilovsky pshilov@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 05c322d9e1e74..c68c31c9fedf5 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -82,8 +82,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204
#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
From: Kairui Song kasong@redhat.com
[ Upstream commit 2aa958c99c7fd3162b089a1a56a34a0cdb778de1 ]
Kexec-ing a kernel with "efi=noruntime" on the first kernel's command line causes the following null pointer dereference:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] Call Trace: efi_runtime_map_copy+0x28/0x30 bzImage64_load+0x688/0x872 arch_kexec_kernel_image_load+0x6d/0x70 kimage_file_alloc_init+0x13e/0x220 __x64_sys_kexec_file_load+0x144/0x290 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Just skip the EFI info setup if EFI runtime services are not enabled.
[ bp: Massage commit message. ]
Suggested-by: Dave Young dyoung@redhat.com Signed-off-by: Kairui Song kasong@redhat.com Signed-off-by: Borislav Petkov bp@suse.de Acked-by: Dave Young dyoung@redhat.com Cc: AKASHI Takahiro takahiro.akashi@linaro.org Cc: Andrew Morton akpm@linux-foundation.org Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: bhe@redhat.com Cc: David Howells dhowells@redhat.com Cc: erik.schmauss@intel.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: kexec@lists.infradead.org Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: Philipp Rudo prudo@linux.vnet.ibm.com Cc: rafael.j.wysocki@intel.com Cc: robert.moore@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: x86-ml x86@kernel.org Cc: Yannik Sembritzki yannik@sembritzki.me Link: https://lkml.kernel.org/r/20190118111310.29589-2-kasong@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/kexec-bzimage64.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index ca05f86481aac..3e22751382c38 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info;
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0;
From: Michal Hocko mhocko@suse.com
[ Upstream commit efad4e475c312456edb3c789d0996d12ed744c13 ]
Patch series "mm, memory_hotplug: fix uninitialized pages fallouts", v2.
Mikhail Zaslonko has posted fixes for the two bugs quite some time ago [1]. I have pushed back on those fixes because I believed that it is much better to plug the problem at the initialization time rather than play whack-a-mole all over the hotplug code and find all the places which expect the full memory section to be initialized.
We have ended up with commit 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") merged and cause a regression [2][3]. The reason is that there might be memory layouts when two NUMA nodes share the same memory section so the merged fix is simply incorrect.
In order to plug this hole we really have to be zone range aware in those handlers. I have split up the original patch into two. One is unchanged (patch 2) and I took a different approach for `removable' crash.
[1] http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com [2] https://bugzilla.redhat.com/show_bug.cgi?id=1666948 [3] http://lkml.kernel.org/r/20190125163938.GA20411@dhcp22.suse.cz
This patch (of 2):
Mikhail has reported the following VM_BUG_ON triggered when reading sysfs removable state of a memory block:
page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: is_mem_section_removable+0xb4/0x190 show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops
The reason is that the memory block spans the zone boundary and we are stumbling over an unitialized struct page. Fix this by enforcing zone range in is_mem_section_removable so that we never run away from a zone.
Link: http://lkml.kernel.org/r/20190128144506.15603-2-mhocko@kernel.org Signed-off-by: Michal Hocko mhocko@suse.com Reported-by: Mikhail Zaslonko zaslonko@linux.ibm.com Debugged-by: Mikhail Zaslonko zaslonko@linux.ibm.com Tested-by: Gerald Schaefer gerald.schaefer@de.ibm.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 05014e89efaec..3fb2067c36a4a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1321,7 +1321,8 @@ static struct page *next_active_pageblock(struct page *page) int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn);
/* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) {
From: Pan Bian bianpan2016@163.com
[ Upstream commit 63ce5f552beb9bdb41546b3a26c4374758b21815 ]
autofs_expire_run() calls dput(dentry) to drop the reference count of dentry. However, dentry is read via autofs_dentry_ino(dentry) after that. This may result in a use-free-bug. The patch drops the reference count of dentry only when it is never used.
Link: http://lkml.kernel.org/r/154725122396.11260.16053424107144453867.stgit@pluto... Signed-off-by: Pan Bian bianpan2016@163.com Signed-off-by: Ian Kent raven@themaw.net Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/autofs4/expire.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 513b8e5d151cb..4cd4b20c69dbf 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -570,7 +570,6 @@ int autofs4_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry);
if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) ret = -EFAULT; @@ -583,6 +582,8 @@ int autofs4_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock);
+ dput(dentry); + return ret; }
From: Ian Kent raven@themaw.net
[ Upstream commit f585b283e3f025754c45bbe7533fc6e5c4643700 ]
In autofs_fill_super() on error of get inode/make root dentry the return should be ENOMEM as this is the only failure case of the called functions.
Link: http://lkml.kernel.org/r/154725123240.11260.796773942606871359.stgit@pluto-t... Signed-off-by: Ian Kent raven@themaw.net Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/autofs4/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 1c55388ae633c..512f70fade243 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -256,8 +256,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs4_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL;
root->d_fsdata = ino;
linux-stable-mirror@lists.linaro.org