From: Xin Long lucien.xin@gmail.com
[ Upstream commit cebe84c6190d741045a322f5343f717139993c08 ]
Now when ip route flush cache and it turn out all fnhe_genid != genid. If a redirect/pmtu icmp packet comes and the old fnhe is found and all it's members but fnhe_genid will be updated.
Then next time when it looks up route and tries to rebind this fnhe to the new dst, the fnhe will be flushed due to fnhe_genid != genid. It causes this redirect/pmtu icmp packet acutally not to be applied.
This patch is to also reset fnhe_genid when updating a route cache.
Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") Acked-by: Hannes Frederic Sowa hannes@stressinduktion.org Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/ipv4/route.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2fe9459fbe24..a04d29d70e6a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -622,9 +622,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; + u32 genid, hval; unsigned int i; int depth; - u32 hval = fnhe_hashfun(daddr); + + genid = fnhe_genid(dev_net(nh->nh_dev)); + hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -647,6 +650,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, }
if (fnhe) { + if (fnhe->fnhe_genid != genid) + fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; if (pmtu) { @@ -671,7 +676,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } - fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu;
From: Xin Long lucien.xin@gmail.com
[ Upstream commit e39d5246111399dbc6e11cd39fd8580191b86c47 ]
Now when creating fnhe for redirect, it sets fnhe_expires for this new route cache. But when updating the exist one, it doesn't do it. It will cause this fnhe never to be expired.
Paolo already noticed it before, in Jianlin's test case, it became even worse:
When ip route flush cache, the old fnhe is not to be removed, but only clean it's members. When redirect comes again, this fnhe will be found and updated, but never be expired due to fnhe_expires not being set.
So fix it by simply updating fnhe_expires even it's for redirect.
Fixes: aee06da6726d ("ipv4: use seqlock for nh_exceptions") Reported-by: Jianlin Shi jishi@redhat.com Acked-by: Hannes Frederic Sowa hannes@stressinduktion.org Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/ipv4/route.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a04d29d70e6a..79c9bacafc61 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -654,10 +654,9 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) { + if (pmtu) fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = max(1UL, expires); - } + fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt)
From: Stephen Bates sbates@raithlin.com
[ Upstream commit 36a3d1dd4e16bcd0d2ddfb4a2ec7092f0ae0d931 ]
If the amount of resources allocated to a gen_pool exceeds 2^32 then the avail atomic overflows and this causes problems when clients try and borrow resources from the pool. This is only expected to be an issue on 64 bit systems.
Add the <linux/atomic.h> header to pull in atomic_long* operations. So that 32 bit systems continue to use atomic32_t but 64 bit systems can use atomic64_t.
Link: http://lkml.kernel.org/r/1509033843-25667-1-git-send-email-sbates@raithlin.c... Signed-off-by: Stephen Bates sbates@raithlin.com Reviewed-by: Logan Gunthorpe logang@deltatee.com Reviewed-by: Mathieu Desnoyers mathieu.desnoyers@efficios.com Reviewed-by: Daniel Mentz danielmentz@google.com Cc: Jonathan Corbet corbet@lwn.net Cc: Andrew Morton akpm@linux-foundation.org Cc: Will Deacon will.deacon@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- include/linux/genalloc.h | 3 ++- lib/genalloc.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 1ccaab44abcc..ec78cd93c0c1 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -31,6 +31,7 @@ #define __GENALLOC_H__
#include <linux/spinlock_types.h> +#include <linux/atomic.h>
struct device; struct device_node; @@ -66,7 +67,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ diff --git a/lib/genalloc.c b/lib/genalloc.c index 2e65d206b01c..c627c3f19935 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; - atomic_set(&chunk->avail, size); + atomic_long_set(&chunk->avail, size);
spin_lock(&pool->lock); list_add_rcu(&chunk->next_chunk, &pool->chunks); @@ -285,7 +285,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (size > atomic_read(&chunk->avail)) + if (size > atomic_long_read(&chunk->avail)) continue;
end_bit = chunk_size(chunk) >> order; @@ -304,7 +304,7 @@ retry:
addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; - atomic_sub(size, &chunk->avail); + atomic_long_sub(size, &chunk->avail); break; } rcu_read_unlock(); @@ -370,7 +370,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); size = nbits << order; - atomic_add(size, &chunk->avail); + atomic_long_add(size, &chunk->avail); rcu_read_unlock(); return; } @@ -444,7 +444,7 @@ size_t gen_pool_avail(struct gen_pool *pool)
rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - avail += atomic_read(&chunk->avail); + avail += atomic_long_read(&chunk->avail); rcu_read_unlock(); return avail; }
From: Trond Myklebust trond.myklebust@primarydata.com
[ Upstream commit d803224c84be067754db7fa58a93f36f61566493 ]
On successful rename, the "old_dentry" is retained and is attached to the "new_dir", so we need to call nfs_set_verifier() accordingly.
Signed-off-by: Trond Myklebust trond.myklebust@primarydata.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 43d63a4d9a92..08b8844544d7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2062,7 +2062,7 @@ out: if (new_inode != NULL) nfs_drop_nlink(new_inode); d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry);
From: Randy Dunlap rdunlap@infradead.org
[ Upstream commit 1f3c790bd5989fcfec9e53ad8fa09f5b740c958f ]
line-range is supposed to treat "1-" as "1-endoffile", so handle the special case by setting last_lineno to UINT_MAX.
Fixes this error:
dynamic_debug:ddebug_parse_query: last-line:0 < 1st-line:1 dynamic_debug:ddebug_exec_query: query parse failed
Link: http://lkml.kernel.org/r/10a6a101-e2be-209f-1f41-54637824788e@infradead.org Signed-off-by: Randy Dunlap rdunlap@infradead.org Acked-by: Jason Baron jbaron@akamai.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- lib/dynamic_debug.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dfba05521748..daec2e93a9ab 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -353,6 +353,10 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_lineno(last, &query->last_lineno) < 0) return -EINVAL;
+ /* special case for last lineno not specified */ + if (query->last_lineno == 0) + query->last_lineno = UINT_MAX; + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno,
From: Chuck Lever chuck.lever@oracle.com
[ Upstream commit b2bfe5915d5fe7577221031a39ac722a0a2a1199 ]
The rpc_task_begin trace point always display a task ID of zero. Move the trace point call site so that it picks up the new task ID.
Signed-off-by: Chuck Lever chuck.lever@oracle.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/sunrpc/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fe3441abdbe5..73cb858dad84 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -273,10 +273,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task) { - trace_rpc_task_begin(task->tk_client, task, NULL); - rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); + trace_rpc_task_begin(task->tk_client, task, NULL); }
/*
From: Xin Long lucien.xin@gmail.com
[ Upstream commit cea0cc80a6777beb6eb643d4ad53690e1ad1d4ff ]
Commit dfcb9f4f99f1 ("sctp: deny peeloff operation on asocs with threads sleeping on it") fixed the race between peeloff and wait sndbuf by checking waitqueue_active(&asoc->wait) in sctp_do_peeloff().
But it actually doesn't work, as even if waitqueue_active returns false the waiting sndbuf thread may still not yet hold sk lock. After asoc is peeled off, sk is not asoc->base.sk any more, then to hold the old sk lock couldn't make assoc safe to access.
This patch is to fix this by changing to hold the new sk lock if sk is not asoc->base.sk, meanwhile, also set the sk in sctp_sendmsg with the new sk.
With this fix, there is no more race between peeloff and waitbuf, the check 'waitqueue_active' in sctp_do_peeloff can be removed.
Thanks Marcelo and Neil for making this clear.
v1->v2: fix it by changing to lock the new sock instead of adding a flag in asoc.
Suggested-by: Neil Horman nhorman@tuxdriver.com Signed-off-by: Xin Long lucien.xin@gmail.com Acked-by: Neil Horman nhorman@tuxdriver.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/sctp/socket.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 732647ca346e..8e019692bb42 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -82,8 +82,8 @@ /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, - size_t msg_len); +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, + size_t msg_len, struct sock **orig_sk); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1949,7 +1949,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + /* sk can be changed by peel off when waiting for buf. */ + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); if (err) { if (err == -ESRCH) { /* asoc is already dead. */ @@ -4479,12 +4480,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for - * sending on this asoc, it cannot be peeled. - */ - if (waitqueue_active(&asoc->wait)) - return -EBUSY; - /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -6988,7 +6983,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len) + size_t msg_len, struct sock **orig_sk) { struct sock *sk = asoc->base.sk; int err = 0; @@ -7022,11 +7017,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) { + release_sock(sk); + sk = asoc->base.sk; + lock_sock(sk); + }
*timeo_p = current_timeo; }
out: + *orig_sk = sk; finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
From: Pavel Tatashin pasha.tatashin@oracle.com
[ Upstream commit 2a20aa171071a334d80c4e5d5af719d8374702fc ]
Without deferred struct page feature (CONFIG_DEFERRED_STRUCT_PAGE_INIT), flags and other fields in "struct page"es are never changed prior to first initializing struct pages by going through __init_single_page().
With deferred struct page feature enabled there is a case where we set some fields prior to initializing:
mem_init() { register_page_bootmem_info(); free_all_bootmem(); ... }
When register_page_bootmem_info() is called only non-deferred struct pages are initialized. But, this function goes through some reserved pages which might be part of the deferred, and thus are not yet initialized.
mem_init register_page_bootmem_info register_page_bootmem_info_node get_page_bootmem .. setting fields here .. such as: page->freelist = (void *)type;
free_all_bootmem() free_low_memory_core_early() for_each_reserved_mem_region() reserve_bootmem_region() init_reserved_page() <- Only if this is deferred reserved page __init_single_pfn() __init_single_page() memset(0) <-- Loose the set fields here
We end up with similar issue as in the previous patch, where currently we do not observe problem as memory is zeroed. But, if flag asserts are changed we can start hitting issues.
Also, because in this patch series we will stop zeroing struct page memory during allocation, we must make sure that struct pages are properly initialized prior to using them.
The deferred-reserved pages are initialized in free_all_bootmem(). Therefore, the fix is to switch the above calls.
Link: http://lkml.kernel.org/r/20171013173214.27300-4-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin pasha.tatashin@oracle.com Reviewed-by: Steven Sistare steven.sistare@oracle.com Reviewed-by: Daniel Jordan daniel.m.jordan@oracle.com Reviewed-by: Bob Picco bob.picco@oracle.com Acked-by: David S. Miller davem@davemloft.net Acked-by: Michal Hocko mhocko@suse.com Cc: Alexander Potapenko glider@google.com Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Matthew Wilcox willy@infradead.org Cc: Mel Gorman mgorman@techsingularity.net Cc: Michal Hocko mhocko@kernel.org Cc: Sam Ravnborg sam@ravnborg.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Will Deacon will.deacon@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/sparc/mm/init_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7ad1baa6c914..1f4492f6efb0 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2215,10 +2215,17 @@ void __init mem_init(void) { high_memory = __va(last_valid_pfn << PAGE_SHIFT);
- register_page_bootmem_info(); free_all_bootmem();
/* + * Must be done after boot memory is put on freelist, because here we + * might set fields in deferred struct pages that have not yet been + * initialized, and free_all_bootmem() initializes all the reserved + * deferred pages for us. + */ + register_page_bootmem_info(); + + /* * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. */
From: Xin Long lucien.xin@gmail.com
[ Upstream commit ca3af4dd28cff4e7216e213ba3b671fbf9f84758 ]
Now in sctp_sendmsg sctp_wait_for_sndbuf could schedule out without holding sock sk. It means the current asoc can be freed elsewhere, like when receiving an abort packet.
If the asoc is just created in sctp_sendmsg and sctp_wait_for_sndbuf returns err, the asoc will be freed again due to new_asoc is not nil. An use-after-free issue would be triggered by this.
This patch is to fix it by setting new_asoc with nil if the asoc is already dead when cpu schedules back, so that it will not be freed again in sctp_sendmsg.
v1->v2: set new_asoc as nil in sctp_sendmsg instead of sctp_wait_for_sndbuf.
Suggested-by: Neil Horman nhorman@tuxdriver.com Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Xin Long lucien.xin@gmail.com Acked-by: Neil Horman nhorman@tuxdriver.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/sctp/socket.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3f89cd063246..732647ca346e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1950,8 +1950,14 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ + new_asoc = NULL; + err = -EPIPE; + } goto out_free; + } }
/* If an address is passed with the sendto/sendmsg call, it is used @@ -6999,10 +7005,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); + if (asoc->base.dead) + goto do_dead; if (!*timeo_p) goto do_nonblock; - if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || - asoc->base.dead) + if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; if (signal_pending(current)) goto do_interrupted; @@ -7027,6 +7034,10 @@ out:
return err;
+do_dead: + err = -ESRCH; + goto out; + do_error: err = -EPIPE; goto out;
From: Arvind Yadav arvind.yadav.cs@gmail.com
[ Upstream commit bde533f2ea607cbbbe76ef8738b36243939a7bc2 ]
atm_dev_register() can fail here and passed parameters to free irq which is not initialised. Initialization of 'dev->irq' happened after the 'goto out_free_irq'. So using 'irq' insted of 'dev->irq' in free_irq().
Signed-off-by: Arvind Yadav arvind.yadav.cs@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/atm/horizon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 1dc0519333f2..27a9fa2718f7 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2828,7 +2828,7 @@ out: return err;
out_free_irq: - free_irq(dev->irq, dev); + free_irq(irq, dev); out_free: kfree(dev); out_release:
From: Mark Bloch markb@mellanox.com
[ Upstream commit 5f22a1d87c5315a98981ecf93cd8de226cffe6ca ]
Maximal message should be used as a limit to the max message payload allowed, without the headers. The ConnectX-3 check is done against this value includes the headers. When the payload is 4K this will cause the NIC to drop packets.
Increase maximal message to 8K as workaround, this shouldn't change current behaviour because we continue to set the MTU to 4k.
To reproduce; set MTU to 4296 on the corresponding interface, for example: ifconfig eth0 mtu 4296 (both server and client)
On server: ib_send_bw -c UD -d mlx4_0 -s 4096 -n 1000000 -i1 -m 4096
On client: ib_send_bw -d mlx4_0 -c UD <server_ip> -s 4096 -n 1000000 -i 1 -m 4096
Fixes: 6e0d733d9215 ("IB/mlx4: Allow 4K messages for UD QPs") Signed-off-by: Mark Bloch markb@mellanox.com Reviewed-by: Majd Dibbiny majd@mellanox.com Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 33cbb7611d66..890bbf984748 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1465,7 +1465,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 13; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n",
From: Herbert Xu herbert@gondor.apana.org.au
[ Upstream commit 0e74aa1d79a5bbc663e03a2804399cae418a0321 ]
The syzbot found an ancient bug in the IPsec code. When we cloned a socket policy (for example, for a child TCP socket derived from a listening socket), we did not copy the family field. This results in a live policy with a zero family field. This triggers a BUG_ON check in the af_key code when the cloned policy is retrieved.
This patch fixes it by copying the family field over.
Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 253e7dda287b..fb84ff4aeee7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1345,6 +1345,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; + newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&net->xfrm.xfrm_policy_lock);
From: David Howells dhowells@redhat.com
[ Upstream commit f4b3526d83c40dd8bf5948b9d7a1b2c340f0dcc8 ]
The handler for the CB.ProbeUuid operation in the cache manager is implemented, but isn't listed in the switch-statement of operation selection, so won't be used. Fix this by adding it.
Signed-off-by: David Howells dhowells@redhat.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- fs/afs/cmservice.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4b0eff6da674..83a8a33a0d73 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -115,6 +115,9 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true;
From: Majd Dibbiny majd@mellanox.com
[ Upstream commit 31fde034a8bd964a5c7c1a5663fc87a913158db2 ]
The UMR's QP is created by calling mlx5_ib_create_qp directly, and therefore the send CQ and the recv CQ on the ibqp weren't assigned.
Assign them right after calling the mlx5_ib_create_qp to assure that any access to those pointers will work as expected and won't crash the system as might happen as part of reset flow.
Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Majd Dibbiny majd@mellanox.com Reviewed-by: Yishai Hadas yishaih@mellanox.com Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 871823cd9825..18645dcf5882 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1099,6 +1099,8 @@ static int create_umr_res(struct mlx5_ib_dev *dev) qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq;
attr->qp_state = IB_QPS_INIT; attr->port_num = 1;
From: Paul Moore paul@paul-moore.com
[ Upstream commit 173743dd99a49c956b124a74c8aacb0384739a4c ]
Prior to this patch we enabled audit in audit_init(), which is too late for PID 1 as the standard initcalls are run after the PID 1 task is forked. This means that we never allocate an audit_context (see audit_alloc()) for PID 1 and therefore miss a lot of audit events generated by PID 1.
This patch enables audit as early as possible to help ensure that when PID 1 is forked it can allocate an audit_context if required.
Reviewed-by: Richard Guy Briggs rgb@redhat.com Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- kernel/audit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c index c6df9905f1c6..e02218c18a81 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -79,13 +79,13 @@ static int audit_initialized; #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 -u32 audit_enabled; -u32 audit_ever_enabled; +u32 audit_enabled = AUDIT_OFF; +u32 audit_ever_enabled = !!AUDIT_OFF;
EXPORT_SYMBOL_GPL(audit_enabled);
/* Default state when kernel boots without any parameters. */ -static u32 audit_default; +static u32 audit_default = AUDIT_OFF;
/* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; @@ -1173,8 +1173,6 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; - audit_enabled = audit_default; - audit_ever_enabled |= !!audit_default;
audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
@@ -1191,6 +1189,8 @@ static int __init audit_enable(char *str) audit_default = !!simple_strtol(str, NULL, 0); if (!audit_default) audit_initialized = AUDIT_DISABLED; + audit_enabled = audit_default; + audit_ever_enabled = !!audit_enabled;
pr_info("%s\n", audit_default ? "enabled (after initialization)" : "disabled (until reboot)");
linux-stable-mirror@lists.linaro.org