From: Su Yanjun suyj.fnst@cn.fujitsu.com
[ Upstream commit dd9ee3444014e8f28c0eefc9fffc9ac9c5248c12 ]
Recently we run a network test over ipcomp virtual tunnel.We find that if a ipv4 packet needs fragment, then the peer can't receive it.
We deep into the code and find that when packet need fragment the smaller fragment will be encapsulated by ipip not ipcomp. So when the ipip packet goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code always set skb'dev to the last fragment's dev. After ipv4 defrag processing, when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV error.
This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
Signed-off-by: Su Yanjun suyj.fnst@cn.fujitsu.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/ip_vti.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 00d4371d45736..306603a7f3514 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, return 0; }
+static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); }
+static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -439,6 +474,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, };
+static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -607,6 +648,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed;
+ msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -616,6 +664,8 @@ static int __init vti_init(void)
rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed:
From: Florian Westphal fw@strlen.de
[ Upstream commit 35e6103861a3a970de6c84688c6e7a1f65b164ca ]
The check assumes that in transport mode, the first templates family must match the address family of the policy selector.
Syzkaller managed to build a template using MODE_ROUTEOPTIMIZATION, with ipv4-in-ipv6 chain, leading to following splat:
BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x1db/0x1854 Read of size 4 at addr ffff888063e57aa0 by task a.out/2050 xfrm_state_find+0x1db/0x1854 xfrm_tmpl_resolve+0x100/0x1d0 xfrm_resolve_and_create_bundle+0x108/0x1000 [..]
Problem is that addresses point into flowi4 struct, but xfrm_state_find treats them as being ipv6 because it uses templ->encap_family is used (AF_INET6 in case of reproducer) rather than family (AF_INET).
This patch inverts the logic: Enforce 'template family must match selector' EXCEPT for tunnel and BEET mode.
In BEET and Tunnel mode, xfrm_tmpl_resolve_one will have remote/local address pointers changed to point at the addresses found in the template, rather than the flowi ones, so no oob read will occur.
Reported-by: 3ntr0py1337@gmail.com Reported-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/xfrm/xfrm_user.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4e8319766f2bb..9ff9255d2191b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1445,10 +1445,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family;
- if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL;
From: Stephane Eranian eranian@google.com
[ Upstream commit 1a51c5da5acc6c188c917ba572eebac5f8793432 ]
The perf_proc_update_handler() handles /proc/sys/kernel/perf_event_max_sample_rate syctl variable. When the PMU IRQ handler timing monitoring is disabled, i.e, when /proc/sys/kernel/perf_cpu_time_max_percent is equal to 0 or 100, then no modification to sysctl_perf_event_sample_rate is allowed to prevent possible hang from wrong values.
The problem is that the test to prevent modification is made after the sysctl variable is modified in perf_proc_update_handler().
You get an error:
$ echo 10001 >/proc/sys/kernel/perf_event_max_sample_rate echo: write error: invalid argument
But the value is still modified causing all sorts of inconsistencies:
$ cat /proc/sys/kernel/perf_event_max_sample_rate 10001
This patch fixes the problem by moving the parsing of the value after the test.
Committer testing:
# echo 100 > /proc/sys/kernel/perf_cpu_time_max_percent # echo 10001 > /proc/sys/kernel/perf_event_max_sample_rate -bash: echo: write error: Invalid argument # cat /proc/sys/kernel/perf_event_max_sample_rate 10001 #
Signed-off-by: Stephane Eranian eranian@google.com Reviewed-by: Andi Kleen ak@linux.intel.com Reviewed-by: Jiri Olsa jolsa@kernel.org Tested-by: Arnaldo Carvalho de Melo acme@redhat.com Cc: Kan Liang kan.liang@linux.intel.com Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/1547169436-6266-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c index 991af683ef9e8..5946a2ea5c46b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - if (ret || !write) - return ret; - + int ret; + int perf_cpu = sysctl_perf_cpu_time_max_percent; /* * If throttling is disabled don't allow the write: */ - if (sysctl_perf_cpu_time_max_percent == 100 || - sysctl_perf_cpu_time_max_percent == 0) + if (write && (perf_cpu == 100 || perf_cpu == 0)) return -EINVAL;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits();
From: Stephane Eranian eranian@google.com
[ Upstream commit 1497e804d1a6e2bd9107ddf64b0310449f4673eb ]
This patch fixes an issue in cpumap.c when used with the TOPOLOGY header. In some configurations, some NUMA nodes may have no CPU (empty cpulist). Yet a cpumap map must be created otherwise perf abort with an error. This patch handles this case by creating a dummy map.
Before:
$ perf record -o - -e cycles noploop 2 | perf script -i - 0x6e8 [0x6c]: failed to process type: 80
After:
$ perf record -o - -e cycles noploop 2 | perf script -i - noploop for 2 seconds
Signed-off-by: Stephane Eranian eranian@google.com Acked-by: Jiri Olsa jolsa@kernel.org Cc: Andi Kleen ak@linux.intel.com Cc: Kan Liang kan.liang@linux.intel.com Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/1547885559-1657-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/cpumap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd33420694..383674f448fcd 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map();
- if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out;
while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out:
From: Brian Welty brian.welty@intel.com
[ Upstream commit 904bba211acc2112fdf866e5a2bc6cd9ecd0de1b ]
The work completion length for a receiving a UD send with immediate is short by 4 bytes causing application using this opcode to fail.
The UD receive logic incorrectly subtracts 4 bytes for immediate value. These bytes are already included in header length and are used to calculate header/payload split, so the result is these 4 bytes are subtracted twice, once when the header length subtracted from the overall length and once again in the UD opcode specific path.
Remove the extra subtraction when handling the opcode.
Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Michael J. Ruhl michael.j.ruhl@intel.com Signed-off-by: Brian Welty brian.welty@intel.com Signed-off-by: Mike Marciniszyn mike.marciniszyn@intel.com Signed-off-by: Dennis Dalessandro dennis.dalessandro@intel.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/hfi1/ud.c | 1 - drivers/infiniband/hw/qib/qib_ud.c | 1 - 2 files changed, 2 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 37abd150fad39..74aff88c593dd 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -954,7 +954,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index be4907453ac4d..5ef144e4a4cbf 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -515,7 +515,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0;
From: Jerry Snitselaar jsnitsel@redhat.com
[ Upstream commit 51d8838d66d3249508940d8f59b07701f2129723 ]
In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called.
Cc: Joerg Roedel joro@8bytes.org Cc: Suravee Suthikulpanit suravee.suthikulpanit@amd.com Signed-off-by: Jerry Snitselaar jsnitsel@redhat.com Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index efa6cd2500b93..2dc9a71cb54f8 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2559,7 +2559,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, }
out_free_iova: - free_iova_fast(&dma_dom->iovad, address, npages); + free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages);
out_err: return 0;
From: Jerry Snitselaar jsnitsel@redhat.com
[ Upstream commit f1724c0883bb0ce93b8dcb94b53dcca3b75ac9a7 ]
In the error path of map_sg there is an incorrect if condition for breaking out of the loop that searches the scatterlist for mapped pages to unmap. Instead of breaking out of the loop once all the pages that were mapped have been unmapped, it will break out of the loop after it has unmapped 1 page. Fix the condition, so it breaks out of the loop only after all the mapped pages have been unmapped.
Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Cc: Joerg Roedel joro@8bytes.org Signed-off-by: Jerry Snitselaar jsnitsel@redhat.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2dc9a71cb54f8..b418a859577ff 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2553,7 +2553,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
- if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } }
From: ZhangXiaoxu zhangxiaoxu5@huawei.com
[ Upstream commit 53ab60baa1ac4f20b080a22c13b77b6373922fd7 ]
There is a UBSAN bug report as below: UBSAN: Undefined behaviour in net/netfilter/ipvs/ip_vs_ctl.c:2227:21 signed integer overflow: -2147483647 * 1000 cannot be represented in type 'int'
Reproduce program: #include <stdio.h> #include <sys/types.h> #include <sys/socket.h>
#define IPPROTO_IP 0 #define IPPROTO_RAW 255
#define IP_VS_BASE_CTL (64+1024+64) #define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10)
/* The argument to IP_VS_SO_GET_TIMEOUT */ struct ipvs_timeout_t { int tcp_timeout; int tcp_fin_timeout; int udp_timeout; };
int main() { int ret = -1; int sockfd = -1; struct ipvs_timeout_t to;
sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (sockfd == -1) { printf("socket init error\n"); return -1; }
to.tcp_timeout = -2147483647; to.tcp_fin_timeout = -2147483647; to.udp_timeout = -2147483647;
ret = setsockopt(sockfd, IPPROTO_IP, IP_VS_SO_SET_TIMEOUT, (char *)(&to), sizeof(to));
printf("setsockopt return %d\n", ret); return ret; }
Return -EINVAL if the timeout value is negative or max than 'INT_MAX / HZ'.
Signed-off-by: ZhangXiaoxu zhangxiaoxu5@huawei.com Acked-by: Simon Horman horms@verge.net.au Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2f45c3ce77ef2..dff4ead3d117c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2252,6 +2252,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->tcp_fin_timeout, u->udp_timeout);
+#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
From: Suravee Suthikulpanit suravee.suthikulpanit@amd.com
[ Upstream commit 9825bd94e3a2baae1f4874767ae3a7d4c049720e ]
When a VM is terminated, the VFIO driver detaches all pass-through devices from VFIO domain by clearing domain id and page table root pointer from each device table entry (DTE), and then invalidates the DTE. Then, the VFIO driver unmap pages and invalidate IOMMU pages.
Currently, the IOMMU driver keeps track of which IOMMU and how many devices are attached to the domain. When invalidate IOMMU pages, the driver checks if the IOMMU is still attached to the domain before issuing the invalidate page command.
However, since VFIO has already detached all devices from the domain, the subsequent INVALIDATE_IOMMU_PAGES commands are being skipped as there is no IOMMU attached to the domain. This results in data corruption and could cause the PCI device to end up in indeterministic state.
Fix this by invalidate IOMMU pages when detach a device, and before decrementing the per-domain device reference counts.
Cc: Boris Ostrovsky boris.ostrovsky@oracle.com Suggested-by: Joerg Roedel joro@8bytes.org Co-developed-by: Brijesh Singh brijesh.singh@amd.com Signed-off-by: Brijesh Singh brijesh.singh@amd.com Signed-off-by: Suravee Suthikulpanit suravee.suthikulpanit@amd.com Fixes: 6de8ad9b9ee0 ('x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs') Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/amd_iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b418a859577ff..6f7587ef832e6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1912,6 +1912,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias;
@@ -1927,10 +1928,6 @@ static void do_detach(struct iommu_dev_data *dev_data) iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias;
- /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -1940,6 +1937,16 @@ static void do_detach(struct iommu_dev_data *dev_data)
/* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; }
/*
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 4fe8713b873fc881284722ce4ac47995de7cf62c ]
ccount_timer_shutdown is called from the atomic context in the secondary_start_kernel, resulting in the following BUG:
BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 Preemption disabled at: secondary_start_kernel+0xa1/0x130 Call Trace: ___might_sleep+0xe7/0xfc __might_sleep+0x41/0x44 synchronize_irq+0x24/0x64 disable_irq+0x11/0x14 ccount_timer_shutdown+0x12/0x20 clockevents_switch_state+0x82/0xb4 clockevents_exchange_device+0x54/0x60 tick_check_new_device+0x46/0x70 clockevents_register_device+0x8c/0xc8 clockevents_config_and_register+0x1d/0x2c local_timer_setup+0x75/0x7c secondary_start_kernel+0xb4/0x130 should_never_return+0x32/0x35
Use disable_irq_nosync instead of disable_irq to avoid it. This is safe because the ccount timer IRQ is per-CPU, and once IRQ is masked the ISR will not be called.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index fd524a54d2ab5..378186b5eb401 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt);
if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0;
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 2b531b6137834a55857a337ac17510d6436b6fbb ]
The cpu-hotplug test assumes that we can offline the maximum CPU as described by /sys/devices/system/cpu/offline. However, in the case where the number of CPUs exceeds like kernel configuration then the offline count can be greater than the present count and we end up trying to test the offlining of a CPU that is not available to offline. Fix this by testing the maximum present CPU instead.
Also, the test currently offlines the CPU and does not online it, so fix this by onlining the CPU after the test.
Fixes: d89dffa976bc ("fault-injection: add selftests for cpu and memory hotplug") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: Shuah Khan shuah@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/cpu-hotplug/cpu-on-off-test.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index f3a8933c12755..49ccd22933438 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -35,6 +35,10 @@ prerequisite() exit 0 fi
+ present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus"
offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -149,6 +153,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0
while getopts e:ahp: opt; do case $opt in @@ -188,9 +194,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max
if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 32a7726c4f4aadfabdb82440d84f88a5a2c8fe13 ]
- add missing memory barriers to the secondary CPU synchronization spin loops; add comment to the matching memory barrier in the boot_secondary and __cpu_die functions; - use READ_ONCE/WRITE_ONCE to access cpu_start_id/cpu_start_ccount instead of reading/writing them directly; - re-initialize cpu_running every time before starting secondary CPU to flush possible previous CPU startup results.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/head.S | 5 ++++- arch/xtensa/kernel/smp.c | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 27c8e07ace43f..29f445b410b37 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -281,12 +281,13 @@ should_never_return:
movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -323,11 +324,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 932d64689bacb..c9fc2c4f71b3b 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -195,9 +195,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i;
#ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1);
@@ -206,18 +208,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount);
- cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount);
- while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout));
- if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -237,6 +242,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack);
+ init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -298,8 +304,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; }
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 306b38305c0f86de7f17c5b091a95451dcc93d7d ]
Secondary CPU reset vector overlaps part of the double exception handler code, resulting in weird crashes and hangups when running user code. Move exception vectors one page up so that they don't clash with the secondary CPU reset vector.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/configs/smp_lx200_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 14e3ca353ac8a..5035b86a2e494 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -34,6 +34,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0"
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 8b1c42cdd7181200dc1fff39dcb6ac1a3fac2c25 ]
Otherwise it is impossible to enable CPUs after booting with 'maxcpus' parameter.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index c9fc2c4f71b3b..80be6449c497e 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i;
- for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); }
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit 25384ce5f9530def39421597b1457d9462df6455 ]
This fixes the following warning at boot when the kernel is booted on a board with more CPU cores than was configured in NR_CPUS:
smp_init_cpus: Core Count = 8 smp_init_cpus: Core Id = 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/cpumask.h:121 smp_init_cpus+0x54/0x74 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc3-00015-g1459333f88a0 #124 Call Trace: __warn$part$3+0x6a/0x7c warn_slowpath_null+0x35/0x3c smp_init_cpus+0x54/0x74 setup_arch+0x1c0/0x1d0 start_kernel+0x44/0x310 _startup+0x107/0x107
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 80be6449c497e..be1f280c322cd 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -96,6 +96,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id);
+ if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); }
From: Tomonori Sakita tomonori.sakita@sord.co.jp
[ Upstream commit 6571ebce112a21ec9be68ef2f53b96fcd41fd81b ]
If fill_level was not zero and status was not BUSY, result of "tx_prod - tx_cons - inuse" might be zero. Subtracting 1 unconditionally results invalid negative return value on this case. Make sure not to return an negative value.
Signed-off-by: Tomonori Sakita tomonori.sakita@sord.co.jp Signed-off-by: Atsushi Nemoto atsushi.nemoto@sord.co.jp Reviewed-by: Dalon L Westergreen dalon.westergreen@linux.intel.com Acked-by: Thor Thayer thor.thayer@linux.intel.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/altera/altera_msgdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba32905..0ae723f753417 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff;
if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status));
From: Yonglong Liu liuyonglong@huawei.com
[ Upstream commit 263c6d75f9a544a3c2f8f6a26de4f4808d8f59cf ]
In hns enet driver, we use of_parse_handle() to get hold of the device node related to "ae-handle" but we have missed to put the node reference using of_node_put() after we are done using the node. This patch fixes it.
Note: This problem is stated in Link: https://lkml.org/lkml/2018/12/22/217
Fixes: 48189d6aaf1e ("net: hns: enet specifies a reference to dsaf") Reported-by: Alexey Khoroshilov khoroshilov@ispras.ru Signed-off-by: Yonglong Liu liuyonglong@huawei.com Signed-off-by: Peng Li lipeng321@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 86662a14208eb..d30c28fba2499 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2532,6 +2532,8 @@ static int hns_nic_dev_probe(struct platform_device *pdev) out_notify_fail: (void)cancel_work_sync(&priv->service_task); out_read_prop_fail: + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); free_netdev(ndev); return ret; } @@ -2561,6 +2563,9 @@ static int hns_nic_dev_remove(struct platform_device *pdev) set_bit(NIC_STATE_REMOVING, &priv->state); (void)cancel_work_sync(&priv->service_task);
+ /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); + free_netdev(ndev); return 0; }
From: Yonglong Liu liuyonglong@huawei.com
[ Upstream commit ed29ca8b9592562559c64d027fb5eb126e463e2c ]
The hns driver of earlier devices, when autoneg off, restart autoneg will return -EINVAL, so make the hns driver for the latest devices do the same.
Signed-off-by: Yonglong Liu liuyonglong@huawei.com Signed-off-by: Peng Li lipeng321@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index c1e947bb852ff..14df03f60e059 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1154,16 +1154,18 @@ static int hns_get_regs_len(struct net_device *net_dev) */ static int hns_nic_nway_reset(struct net_device *netdev) { - int ret = 0; struct phy_device *phy = netdev->phydev;
- if (netif_running(netdev)) { - /* if autoneg is disabled, don't restart auto-negotiation */ - if (phy && phy->autoneg == AUTONEG_ENABLE) - ret = genphy_restart_aneg(phy); - } + if (!netif_running(netdev)) + return 0;
- return ret; + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); }
static u32
From: Yonglong Liu liuyonglong@huawei.com
[ Upstream commit cec8abba13e6a26729dfed41019720068eeeff2b ]
When reading phy registers via Clause 45 MDIO protocol, after write address operation, the driver use another write address operation, so can not read the right value of any phy registers. This patch fixes it.
Signed-off-by: Yonglong Liu liuyonglong@huawei.com Signed-off-by: Peng Li lipeng321@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/hisilicon/hns_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 017e08452d8c0..baf5cc251f329 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -321,7 +321,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) }
hns_mdio_cmd_write(mdio_dev, is_c45, - MDIO_C45_WRITE_ADDR, phy_id, devad); + MDIO_C45_READ, phy_id, devad); }
/* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/
From: Alexey Khoroshilov khoroshilov@ispras.ru
[ Upstream commit c69c29a1a0a8f68cd87e98ba4a5a79fb8ef2a58c ]
If phy_power_on() fails in rk_gmac_powerup(), clocks are left enabled.
Found by Linux Driver Verification project (linuxtesting.org).
Signed-off-by: Alexey Khoroshilov khoroshilov@ispras.ru Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 13133b30b575e..01787344f6e59 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1284,8 +1284,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) }
ret = phy_power_on(bsp_priv, true); - if (ret) + if (ret) { + gmac_clk_enable(bsp_priv, false); return ret; + }
pm_runtime_enable(dev); pm_runtime_get_sync(dev);
From: Florian Westphal fw@strlen.de
[ Upstream commit 2035f3ff8eaa29cfb5c8e2160b0f6e85eeb21a95 ]
Unlike ip(6)tables ebtables only counts user-defined chains.
The effect is that a 32bit ebtables binary on a 64bit kernel can do 'ebtables -N FOO' only after adding at least one rule, else the request fails with -EINVAL.
This is a similar fix as done in 3f1e53abff84 ("netfilter: ebtables: don't attempt to allocate 0-sized compat array").
Fixes: 7d7d7e02111e9 ("netfilter: compat: reject huge allocation requests") Reported-by: Francesco Ruggeri fruggeri@arista.com Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/bridge/netfilter/ebtables.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 22e4c15a1fc30..53392ac58b38f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2292,9 +2292,12 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; + if (tmp.nentries) { + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; + } + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock;
From: Andrew Lunn andrew@lunn.ch
[ Upstream commit 7ae710f9f8b2cf95297e7bbfe1c09789a7dc43d4 ]
On SoC reset all GPIO interrupts are disable. However, if kexec is used to boot into a new kernel, the SoC does not experience a reset. Hence GPIO interrupts can be left enabled from the previous kernel. It is then possible for the interrupt to fire before an interrupt handler is registered, resulting in the kernel complaining of an "unexpected IRQ trap", the interrupt is never cleared, and so fires again, resulting in an interrupt storm.
Disable all GPIO interrupts before registering the GPIO IRQ chip.
Fixes: 7f2691a19627 ("gpio: vf610: add gpiolib/IRQ chip driver for Vybrid") Signed-off-by: Andrew Lunn andrew@lunn.ch Acked-by: Stefan Agner stefan@agner.ch Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpio/gpio-vf610.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index cbe9e06861de0..1309b444720e3 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -261,6 +261,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret;
port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -300,6 +301,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret;
+ /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR);
From: Fathi Boudra fathi.boudra@linaro.org
[ Upstream commit 7d4e591bc051d3382c45caaa2530969fb42ed23d ]
posix_timers fails to build due to undefined reference errors:
aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -O2 -pipe -g -feliminate-unused-debug-types -O3 -Wl,-no-as-needed -Wall -DKTEST -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lrt -lpthread posix_timers.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers /tmp/cc1FTZzT.o: In function `check_timer_create': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:157: undefined reference to `timer_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:170: undefined reference to `timer_settime' collect2: error: ld returned 1 exit status
It's GNU Make and linker specific.
The default Makefile rule looks like:
$(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS)
When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with.
More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html
LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead.
LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable.
https://lkml.org/lkml/2010/2/10/362
tools/perf: libraries must come after objects
Link order matters, use LDLIBS instead of LDFLAGS to properly link against libpthread.
Signed-off-by: Denys Dmytriyenko denys@ti.com Signed-off-by: Fathi Boudra fathi.boudra@linaro.org Signed-off-by: Shuah Khan shuah@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/timers/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 3496680981f20..d937e45532d83 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm
# these are all "safe" tests that don't modify # system time or require escalated privileges
From: Yao Liu yotta.liu@ucloud.cn
[ Upstream commit 80ff00172407e0aad4b10b94ef0816fc3e7813cb ]
There is a NULL pointer dereference of dev_name in nfs_parse_devname()
The oops looks something like:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:nfs_fs_mount+0x3b6/0xc20 [nfs] ... Call Trace: ? ida_alloc_range+0x34b/0x3d0 ? nfs_clone_super+0x80/0x80 [nfs] ? nfs_free_parsed_mount_data+0x60/0x60 [nfs] mount_fs+0x52/0x170 ? __init_waitqueue_head+0x3b/0x50 vfs_kern_mount+0x6b/0x170 do_mount+0x216/0xdc0 ksys_mount+0x83/0xd0 __x64_sys_mount+0x25/0x30 do_syscall_64+0x65/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Fix this by adding a NULL check on dev_name
Signed-off-by: Yao Liu yotta.liu@ucloud.cn Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/nfs/super.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 38de09b08e966..d0d0438a5b263 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1901,6 +1901,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end;
+ if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']');
From: Manish Chopra manishc@marvell.com
[ Upstream commit 9e71a15d8b5bbce25c637f7f8833cd3f45b65646 ]
When running tx switched traffic between VNICs created via a bridge(to which VFs are added), adapter drops the unicast packets in tx flow due to VNIC's ucast mac being unknown to it. But VF interfaces being in promiscuous mode should have caused adapter to accept all the unknown ucast packets. Later, it was found that driver doesn't really configure tx promiscuous mode settings to accept all unknown unicast macs.
This patch fixes tx promiscuous mode settings to accept all unknown/unmatched unicast macs and works out the scenario.
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 83c1c4fa102b7..5191b575d57b2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -607,6 +607,10 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
+ SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, + (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && + !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, !!(accept_filter & QED_ACCEPT_BCAST));
@@ -2640,7 +2644,8 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; - accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; + accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | + QED_ACCEPT_MCAST_UNMATCHED; } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
From: Manish Chopra manishc@marvell.com
[ Upstream commit ff9296966e5e00b0d0d00477b2365a178f0f06a3 ]
VF is always configured to drop control frames (with reserved mac addresses) but to work LACP on the VFs, it would require LACP control frames to be forwarded or transmitted successfully.
This patch fixes this in such a way that trusted VFs (marked through ndo_set_vf_trust) would be allowed to pass the control frames such as LACP pdus.
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++++ drivers/net/ethernet/qlogic/qed/qed_l2.h | 3 +++ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 10 ++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 5191b575d57b2..4ffdde755db7e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -747,6 +747,11 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, return rc; }
+ if (p_params->update_ctl_frame_check) { + p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en; + p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en; + } + /* Update mcast bins for VFs, PF doesn't use this functionality */ qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 91d383f3a661f..7c41142452a3b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -218,6 +218,9 @@ struct qed_sp_vport_update_params { struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; + u8 update_ctl_frame_check; + u8 mac_chk_en; + u8 ethtype_chk_en; };
int qed_sp_vport_update(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index c6411158afd7b..65a53d409e773 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1963,7 +1963,9 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; - params.check_mac = true; + + /* Non trusted VFs should enable control frame filtering */ + params.check_mac = !vf->p_vf_info.is_trusted_configured;
rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); if (rc) { @@ -4910,6 +4912,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.opaque_fid = vf->opaque_fid; params.vport_id = vf->vport_id;
+ params.update_ctl_frame_check = 1; + params.mac_chk_en = !vf_info->is_trusted_configured; + if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; flags->rx_accept_filter = vf_info->rx_accept_mode; @@ -4927,7 +4932,8 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) }
if (flags->update_rx_mode_config || - flags->update_tx_mode_config) + flags->update_tx_mode_config || + params.update_ctl_frame_check) qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); }
From: Manish Chopra manishc@marvell.com
[ Upstream commit 327852ec64205bb651be391a069784872098a3b2 ]
VFs may hit VF-PF channel timeout while probing, as in some cases it was observed that VF FLR and VF "acquire" message transaction (i.e first message from VF to PF in VF's probe flow) could occur simultaneously which could lead VF to fail sending "acquire" message to PF as VF is marked disabled from HW perspective due to FLR, which will result into channel timeout and VF probe failure.
In such cases, try retrying VF "acquire" message so that in later attempts it could be successful to pass message to PF after the VF FLR is completed and can be probed successfully.
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index dd8ebf6d380f9..3220086f99dea 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -261,6 +261,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; struct vf_pf_resc_request *p_resc; + u8 retry_cnt = VF_ACQUIRE_THRESH; bool resources_acquired = false; struct vfpf_acquire_tlv *req; int rc = 0, attempts = 0; @@ -314,6 +315,15 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
/* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + + /* Re-try acquire in case of vf-pf hw channel timeout */ + if (retry_cnt && rc == -EBUSY) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF retrying to acquire due to VPC timeout\n"); + retry_cnt--; + continue; + } + if (rc) goto exit;
From: Manish Chopra manishc@marvell.com
[ Upstream commit 7c81626a3c37e4ac320b8ad785694ba498f24794 ]
Cache number of fragments in the skb locally as in case of linear skb (with zero fragments), tx completion (or freeing of skb) may happen before driver tries to get number of frgaments from the skb which could lead to stale access to an already freed skb.
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index b73bcbeb5f279..cec3ce390c701 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2295,19 +2295,24 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) { struct qed_ll2_tx_pkt_info pkt; const skb_frag_t *frag; + u8 flags = 0, nr_frags; int rc = -EINVAL, i; dma_addr_t mapping; u16 vlan = 0; - u8 flags = 0;
if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); return -EINVAL; }
- if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + /* Cache number of fragments from SKB since SKB may be freed by + * the completion routine after calling qed_ll2_prepare_tx_packet() + */ + nr_frags = skb_shinfo(skb)->nr_frags; + + if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", - 1 + skb_shinfo(skb)->nr_frags); + 1 + nr_frags); return -EINVAL; }
@@ -2329,7 +2334,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) }
memset(&pkt, 0, sizeof(pkt)); - pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; + pkt.num_of_bds = 1 + nr_frags; pkt.vlan = vlan; pkt.bd_flags = flags; pkt.tx_dest = QED_LL2_TX_DEST_NW; @@ -2337,12 +2342,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) pkt.first_frag_len = skb->len; pkt.cookie = skb;
+ /* qed_ll2_prepare_tx_packet() may actually send the packet if + * there are no fragments in the skb and subsequently the completion + * routine may run and free the SKB, so no dereferencing the SKB + * beyond this point unless skb has any fragments. + */ rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, &pkt, 1); if (rc) goto err;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + for (i = 0; i < nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i];
mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,
From: Manish Chopra manishc@marvell.com
[ Upstream commit ffb057f98928aa099b08e419bbe5afc26ec9f448 ]
KASAN reported following bug in qed_init_qm_get_idx_from_flags due to inappropriate casting of "pq_flags". Fix the type of "pq_flags".
[ 196.624707] BUG: KASAN: stack-out-of-bounds in qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624712] Read of size 8 at addr ffff809b00bc7360 by task kworker/0:9/1712 [ 196.624714] [ 196.624720] CPU: 0 PID: 1712 Comm: kworker/0:9 Not tainted 4.18.0-60.el8.aarch64+debug #1 [ 196.624723] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL024 09/26/2018 [ 196.624733] Workqueue: events work_for_cpu_fn [ 196.624738] Call trace: [ 196.624742] dump_backtrace+0x0/0x2f8 [ 196.624745] show_stack+0x24/0x30 [ 196.624749] dump_stack+0xe0/0x11c [ 196.624755] print_address_description+0x68/0x260 [ 196.624759] kasan_report+0x178/0x340 [ 196.624762] __asan_report_load_n_noabort+0x38/0x48 [ 196.624786] qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624808] qed_init_qm_info+0xec0/0x2200 [qed] [ 196.624830] qed_resc_alloc+0x284/0x7e8 [qed] [ 196.624853] qed_slowpath_start+0x6cc/0x1ae8 [qed] [ 196.624864] __qede_probe.isra.10+0x1cc/0x12c0 [qede] [ 196.624874] qede_probe+0x78/0xf0 [qede] [ 196.624879] local_pci_probe+0xc4/0x180 [ 196.624882] work_for_cpu_fn+0x54/0x98 [ 196.624885] process_one_work+0x758/0x1900 [ 196.624888] worker_thread+0x4e0/0xd18 [ 196.624892] kthread+0x2c8/0x350 [ 196.624897] ret_from_fork+0x10/0x18 [ 196.624899] [ 196.624902] Allocated by task 2: [ 196.624906] kasan_kmalloc.part.1+0x40/0x108 [ 196.624909] kasan_kmalloc+0xb4/0xc8 [ 196.624913] kasan_slab_alloc+0x14/0x20 [ 196.624916] kmem_cache_alloc_node+0x1dc/0x480 [ 196.624921] copy_process.isra.1.part.2+0x1d8/0x4a98 [ 196.624924] _do_fork+0x150/0xfa0 [ 196.624926] kernel_thread+0x48/0x58 [ 196.624930] kthreadd+0x3a4/0x5a0 [ 196.624932] ret_from_fork+0x10/0x18 [ 196.624934] [ 196.624937] Freed by task 0: [ 196.624938] (stack is not available) [ 196.624940] [ 196.624943] The buggy address belongs to the object at ffff809b00bc0000 [ 196.624943] which belongs to the cache thread_stack of size 32768 [ 196.624946] The buggy address is located 29536 bytes inside of [ 196.624946] 32768-byte region [ffff809b00bc0000, ffff809b00bc8000) [ 196.624948] The buggy address belongs to the page: [ 196.624952] page:ffff7fe026c02e00 count:1 mapcount:0 mapping:ffff809b4001c000 index:0x0 compound_mapcount: 0 [ 196.624960] flags: 0xfffff8000008100(slab|head) [ 196.624967] raw: 0fffff8000008100 dead000000000100 dead000000000200 ffff809b4001c000 [ 196.624970] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 [ 196.624973] page dumped because: kasan: bad access detected [ 196.624974] [ 196.624976] Memory state around the buggy address: [ 196.624980] ffff809b00bc7200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624983] ffff809b00bc7280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624985] >ffff809b00bc7300: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 [ 196.624988] ^ [ 196.624990] ffff809b00bc7380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624993] ffff809b00bc7400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624995] ==================================================================
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 16953c4ebd71b..410528e7d927c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -435,19 +435,19 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn,
/* get pq index according to PQ_FLAGS */ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, - u32 pq_flags) + unsigned long pq_flags) { struct qed_qm_info *qm_info = &p_hwfn->qm_info;
/* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, + if (bitmap_weight(&pq_flags, sizeof(pq_flags) * BITS_PER_BYTE) > 1) { - DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); + DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags); goto err; }
if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { - DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags); goto err; }
From: Ming Lu ming.lu@citrix.com
[ Upstream commit 5d8fc4a9f0eec20b6c07895022a6bea3fb6dfb38 ]
The issue to be fixed in this commit is when libfc found it received a invalid FLOGI response from FC switch, it would return without freeing the fc frame, which is just the skb data. This would cause memory leak if FC switch keeps sending invalid FLOGI responses.
This fix is just to make it execute `fc_frame_free(fp)` before returning from function `fc_lport_flogi_resp`.
Signed-off-by: Ming Lu ming.lu@citrix.com Reviewed-by: Hannes Reinecke hare@suse.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/libfc/fc_lport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 2fd0ec6511704..ca7967e390f19 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1739,14 +1739,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; }
flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; }
mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1756,7 +1756,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; }
if (mfs <= lport->mfs) {
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit 8437fcf14deed67e5ad90b5e8abf62fb20f30881 ]
The "hostdata->dev" pointer is NULL here. We set "hostdata->dev = dev;" later in the function and we also use "hostdata->dev" when we call dma_free_attrs() in NCR_700_release().
This bug predates git version control.
Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/53c700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 6be77b3aa8a5f..ac79f2088b316 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs;
- memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript, + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); if(memory == NULL) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n");
From: Sinan Kaya okaya@kernel.org
[ Upstream commit 0ee4b5f801b73b83a9fb3921d725f2162fd4a2e5 ]
Add BACKLIGHT_LCD_SUPPORT for SAMSUNG_Q10 to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE.
SAMSUNG_Q10 selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT.
Copy BACKLIGHT_LCD_SUPPORT dependency into SAMSUNG_Q10 to fix:
WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - SAMSUNG_Q10 [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y]
Signed-off-by: Sinan Kaya okaya@kernel.org Acked-by: Andy Shevchenko andy.shevchenko@gmail.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 80b87954f6ddf..09035705d0a07 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -999,6 +999,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10
From: Ronnie Sahlberg lsahlber@redhat.com
[ Upstream commit 58d15ed1203f4d858c339ea4d7dafa94bd2a56d3 ]
The size of the fixed part of the create response is 88 bytes not 56.
Signed-off-by: Ronnie Sahlberg lsahlber@redhat.com Signed-off-by: Steve French stfrench@microsoft.com Reviewed-by: Pavel Shilovsky pshilov@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e524540597256..bad458a2b579e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
From: Jianchao Wang jianchao.w.wang@oracle.com
[ Upstream commit 85bd6e61f34dffa8ec2dc75ff3c02ee7b2f1cbce ]
Florian reported a io hung issue when fsync(). It should be triggered by following race condition.
data + post flush a flush
blk_flush_complete_seq case REQ_FSEQ_DATA blk_flush_queue_rq issued to driver blk_mq_dispatch_rq_list try to issue a flush req failed due to NON-NCQ command .queue_rq return BLK_STS_DEV_RESOURCE
request completion req->end_io // doesn't check RESTART mq_flush_data_end_io case REQ_FSEQ_POSTFLUSH blk_kick_flush do nothing because previous flush has not been completed blk_mq_run_hw_queue insert rq to hctx->dispatch due to RESTART is still set, do nothing
To fix this, replace the blk_mq_run_hw_queue in mq_flush_data_end_io with blk_mq_sched_restart to check and clear the RESTART flag.
Fixes: bd166ef1 (blk-mq-sched: add framework for MQ capable IO schedulers) Reported-by: Florian Stecker m19@florianstecker.de Tested-by: Florian Stecker m19@florianstecker.de Signed-off-by: Jianchao Wang jianchao.w.wang@oracle.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-flush.c b/block/blk-flush.c index 4938bec8cfef9..6603352879e73 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -402,7 +402,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
- blk_mq_run_hw_queue(hctx, true); + blk_mq_sched_restart(hctx); }
/**
From: Thomas Lendacky Thomas.Lendacky@amd.com
[ Upstream commit 912139cfbfa6a2bc1da052314d2c29338dae1f6a ]
The load_microcode_amd() function searches for microcode patches and attempts to apply a microcode patch if it is of different level than the currently installed level.
While the processor won't actually load a level that is less than what is already installed, the logic wrongly returns UCODE_NEW thus signaling to its caller reload_store() that a late loading should be attempted.
If the file-system contains an older microcode revision than what is currently running, such a late microcode reload can result in these misleading messages:
x86/CPU: CPU features have changed after loading microcode, but might not take effect. x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.
These messages were issued on a system where SME/SEV are not enabled by the BIOS (MSR C001_0010[23] = 0b) because during boot, early_detect_mem_encrypt() is called and cleared the SME and SEV features in this case.
However, after the wrong late load attempt, get_cpu_cap() is called and reloads the SME and SEV feature bits, resulting in the messages.
Update the microcode level check to not attempt microcode loading if the current level is greater than(!) and not only equal to the current patch level.
[ bp: massage commit message. ]
Fixes: 2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present") Signed-off-by: Tom Lendacky thomas.lendacky@amd.com Signed-off-by: Borislav Petkov bp@suse.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: x86-ml x86@kernel.org Link: https://lkml.kernel.org/r/154894518427.9406.8246222496874202773.stgit@tlenda... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9d33dbf2489e2..d0a61d3e2fb94 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -707,7 +707,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) if (!p) { return ret; } else { - if (boot_cpu_data.microcode == p->patch_id) + if (boot_cpu_data.microcode >= p->patch_id) return ret;
ret = UCODE_NEW;
From: James Morse james.morse@arm.com
[ Upstream commit f2b3d8566d81deaca31f4e3163def0bea7746e11 ]
On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text.
__hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic.
Move the __hyp_text check in the kprobes blacklist so it applies on VHE systems too, to cover the common code and guest enter/exit assembly.
Fixes: 888b3c8720e0 ("arm64: Treat all entry code as non-kprobe-able") Reviewed-by: Christoffer Dall christoffer.dall@arm.com Signed-off-by: James Morse james.morse@arm.com Acked-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/probes/kprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0417c929d21a0..7d8c33279e9fd 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -554,13 +554,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true;
if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; }
From: John Johansen john.johansen@canonical.com
[ Upstream commit d6d478aee003e19ef90321176552a8ad2929a47f ]
aa_label_merge() can return NULL for memory allocations failures make sure to handle and set the correct error in this case.
Reported-by: Peng Hao peng.hao2@zte.com.cn Signed-off-by: John Johansen john.johansen@canonical.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/apparmor/domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index dd754b7850a82..67bf8b7ee8a28 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1260,7 +1260,10 @@ int aa_change_profile(const char *fqname, int flags) aa_get_label(&profile->label)); if (IS_ERR_OR_NULL(new)) { info = "failed to build target label"; - error = PTR_ERR(new); + if (!new) + error = -ENOMEM; + else + error = PTR_ERR(new); new = NULL; perms.allow = 0; goto audit;
From: Kairui Song kasong@redhat.com
[ Upstream commit 2aa958c99c7fd3162b089a1a56a34a0cdb778de1 ]
Kexec-ing a kernel with "efi=noruntime" on the first kernel's command line causes the following null pointer dereference:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] Call Trace: efi_runtime_map_copy+0x28/0x30 bzImage64_load+0x688/0x872 arch_kexec_kernel_image_load+0x6d/0x70 kimage_file_alloc_init+0x13e/0x220 __x64_sys_kexec_file_load+0x144/0x290 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Just skip the EFI info setup if EFI runtime services are not enabled.
[ bp: Massage commit message. ]
Suggested-by: Dave Young dyoung@redhat.com Signed-off-by: Kairui Song kasong@redhat.com Signed-off-by: Borislav Petkov bp@suse.de Acked-by: Dave Young dyoung@redhat.com Cc: AKASHI Takahiro takahiro.akashi@linaro.org Cc: Andrew Morton akpm@linux-foundation.org Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: bhe@redhat.com Cc: David Howells dhowells@redhat.com Cc: erik.schmauss@intel.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: kexec@lists.infradead.org Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: Philipp Rudo prudo@linux.vnet.ibm.com Cc: rafael.j.wysocki@intel.com Cc: robert.moore@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: x86-ml x86@kernel.org Cc: Yannik Sembritzki yannik@sembritzki.me Link: https://lkml.kernel.org/r/20190118111310.29589-2-kasong@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/kexec-bzimage64.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 928b0c6083c9c..4d948d87f01cc 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info;
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0;
From: Qian Cai cai@lca.pw
[ Upstream commit a8e911d13540487942d53137c156bd7707f66e5d ]
If the kernel is configured with KASAN_EXTRA, the stack size is increasted significantly because this option sets "-fstack-reuse" to "none" in GCC [1]. As a result, it triggers stack overrun quite often with 32k stack size compiled using GCC 8. For example, this reproducer
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/sysca...
triggers a "corrupted stack end detected inside scheduler" very reliably with CONFIG_SCHED_STACK_END_CHECK enabled.
There are just too many functions that could have a large stack with KASAN_EXTRA due to large local variables that have been called over and over again without being able to reuse the stacks. Some noticiable ones are
size 7648 shrink_page_list 3584 xfs_rmap_convert 3312 migrate_page_move_mapping 3312 dev_ethtool 3200 migrate_misplaced_transhuge_page 3168 copy_process
There are other 49 functions are over 2k in size while compiling kernel with "-Wframe-larger-than=" even with a related minimal config on this machine. Hence, it is too much work to change Makefiles for each object to compile without "-fsanitize-address-use-after-scope" individually.
[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23
Although there is a patch in GCC 9 to help the situation, GCC 9 probably won't be released in a few months and then it probably take another 6-month to 1-year for all major distros to include it as a default. Hence, the stack usage with KASAN_EXTRA can be revisited again in 2020 when GCC 9 is everywhere. Until then, this patch will help users avoid stack overrun.
This has already been fixed for arm64 for the same reason via 6e8830674ea ("arm64: kasan: Increase stack size for KASAN_EXTRA").
Link: http://lkml.kernel.org/r/20190109215209.2903-1-cai@lca.pw Signed-off-by: Qian Cai cai@lca.pw Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Alexander Potapenko glider@google.com Cc: Dmitry Vyukov dvyukov@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/include/asm/page_64_types.h | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 74d531f6d5180..50c8baaca4b06 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -7,7 +7,11 @@ #endif
#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif
From: Michal Hocko mhocko@suse.com
[ Upstream commit efad4e475c312456edb3c789d0996d12ed744c13 ]
Patch series "mm, memory_hotplug: fix uninitialized pages fallouts", v2.
Mikhail Zaslonko has posted fixes for the two bugs quite some time ago [1]. I have pushed back on those fixes because I believed that it is much better to plug the problem at the initialization time rather than play whack-a-mole all over the hotplug code and find all the places which expect the full memory section to be initialized.
We have ended up with commit 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") merged and cause a regression [2][3]. The reason is that there might be memory layouts when two NUMA nodes share the same memory section so the merged fix is simply incorrect.
In order to plug this hole we really have to be zone range aware in those handlers. I have split up the original patch into two. One is unchanged (patch 2) and I took a different approach for `removable' crash.
[1] http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com [2] https://bugzilla.redhat.com/show_bug.cgi?id=1666948 [3] http://lkml.kernel.org/r/20190125163938.GA20411@dhcp22.suse.cz
This patch (of 2):
Mikhail has reported the following VM_BUG_ON triggered when reading sysfs removable state of a memory block:
page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: is_mem_section_removable+0xb4/0x190 show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops
The reason is that the memory block spans the zone boundary and we are stumbling over an unitialized struct page. Fix this by enforcing zone range in is_mem_section_removable so that we never run away from a zone.
Link: http://lkml.kernel.org/r/20190128144506.15603-2-mhocko@kernel.org Signed-off-by: Michal Hocko mhocko@suse.com Reported-by: Mikhail Zaslonko zaslonko@linux.ibm.com Debugged-by: Mikhail Zaslonko zaslonko@linux.ibm.com Tested-by: Gerald Schaefer gerald.schaefer@de.ibm.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c7c74a927d6f8..39db89f3df657 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1256,7 +1256,8 @@ static struct page *next_active_pageblock(struct page *page) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn);
/* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) {
From: Mikhail Zaslonko zaslonko@linux.ibm.com
[ Upstream commit 24feb47c5fa5b825efb0151f28906dfdad027e61 ]
If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct pages access from test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers.
Here are the the panic examples: CONFIG_DEBUG_VM_PGFLAGS=y kernel parameter mem=2050M -------------------------- page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: test_pages_in_a_zone+0xde/0x160 show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops
Fix this by checking whether the pfn to check is within the zone.
[mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Link: http://lkml.kernel.org/r/20190128144506.15603-3-mhocko@kernel.org
[mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Signed-off-by: Michal Hocko mhocko@suse.com Signed-off-by: Mikhail Zaslonko zaslonko@linux.ibm.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Reviewed-by: Oscar Salvador osalvador@suse.de Tested-by: Gerald Schaefer gerald.schaefer@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/memory_hotplug.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 39db89f3df657..c9d3a49bd4e20 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1297,6 +1297,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0;
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit db7ddeab3ce5d64c9696e70d61f45ea9909cd196 ]
There is a copy and paste bug so we set "config->test_driver" to NULL twice instead of setting "config->test_fs". Smatch complains that it leads to a double free:
lib/test_kmod.c:840 __kmod_config_init() warn: 'config->test_fs' double freed
Link: http://lkml.kernel.org/r/20190121140011.GA14283@kadam Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Acked-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 7abb59ce6613a..cf619795a1822 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) config->test_driver = NULL;
kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; }
static void kmod_config_free(struct kmod_test_device *test_dev)
From: Jan Kara jack@suse.cz
[ Upstream commit c27d82f52f75fc9d8d9d40d120d2a96fdeeada5e ]
When superblock has lots of inodes without any pagecache (like is the case for /proc), drop_pagecache_sb() will iterate through all of them without dropping sb->s_inode_list_lock which can lead to softlockups (one of our customers hit this).
Fix the problem by going to the slow path and doing cond_resched() in case the process needs rescheduling.
Link: http://lkml.kernel.org/r/20190114085343.15011-1-jack@suse.cz Signed-off-by: Jan Kara jack@suse.cz Acked-by: Michal Hocko mhocko@suse.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Cc: Al Viro viro@ZenIV.linux.org.uk Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/drop_caches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f0..d31b6c72b4764 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock);
+ cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode;
From: Pan Bian bianpan2016@163.com
[ Upstream commit 63ce5f552beb9bdb41546b3a26c4374758b21815 ]
autofs_expire_run() calls dput(dentry) to drop the reference count of dentry. However, dentry is read via autofs_dentry_ino(dentry) after that. This may result in a use-free-bug. The patch drops the reference count of dentry only when it is never used.
Link: http://lkml.kernel.org/r/154725122396.11260.16053424107144453867.stgit@pluto... Signed-off-by: Pan Bian bianpan2016@163.com Signed-off-by: Ian Kent raven@themaw.net Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/autofs4/expire.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 57725d4a8c59e..141f9bc213a3d 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -567,7 +567,6 @@ int autofs4_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry);
if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) ret = -EFAULT; @@ -580,6 +579,8 @@ int autofs4_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock);
+ dput(dentry); + return ret; }
From: Ian Kent raven@themaw.net
[ Upstream commit f585b283e3f025754c45bbe7533fc6e5c4643700 ]
In autofs_fill_super() on error of get inode/make root dentry the return should be ENOMEM as this is the only failure case of the called functions.
Link: http://lkml.kernel.org/r/154725123240.11260.796773942606871359.stgit@pluto-t... Signed-off-by: Ian Kent raven@themaw.net Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/autofs4/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3c7e727612fa3..e455388a939ce 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -259,8 +259,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs4_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL;
root->d_fsdata = ino;
linux-stable-mirror@lists.linaro.org