commit 0962590e553331db2cc0aef2dc35c57f6300dbbe upstream.
ALU operations on pointers such as scalar_reg += map_value_ptr are
handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr
and range in the register state share a union, so transferring state
through dst_reg->range = ptr_reg->range is just buggy as any new
map_ptr in the dst_reg is then truncated (or null) for subsequent
checks. Fix this by adding a raw member and use it for copying state
over to dst_reg.
Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Cc: Edward Cree <ecree(a)solarflare.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Edward Cree <ecree(a)solarflare.com>
---
include/linux/bpf_verifier.h | 3 +++
kernel/bpf/verifier.c | 10 ++++++----
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 73bec75..a333300 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -50,6 +50,9 @@ struct bpf_reg_state {
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
+
+ /* Max size from any of the above. */
+ unsigned long raw;
};
/* Fixed part of pointer offset, pointer types only */
s32 off;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a0ffc62..013b0cd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1935,7 +1935,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->umax_value = umax_ptr;
dst_reg->var_off = ptr_reg->var_off;
dst_reg->off = ptr_reg->off + smin_val;
- dst_reg->range = ptr_reg->range;
+ dst_reg->raw = ptr_reg->raw;
break;
}
/* A new variable offset is created. Note that off_reg->off
@@ -1965,10 +1965,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
+ dst_reg->raw = ptr_reg->raw;
if (ptr_reg->type == PTR_TO_PACKET) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
- dst_reg->range = 0;
+ dst_reg->raw = 0;
}
break;
case BPF_SUB:
@@ -1999,7 +2000,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->var_off = ptr_reg->var_off;
dst_reg->id = ptr_reg->id;
dst_reg->off = ptr_reg->off - smin_val;
- dst_reg->range = ptr_reg->range;
+ dst_reg->raw = ptr_reg->raw;
break;
}
/* A new variable offset is created. If the subtrahend is known
@@ -2025,11 +2026,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
+ dst_reg->raw = ptr_reg->raw;
if (ptr_reg->type == PTR_TO_PACKET) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
if (smin_val < 0)
- dst_reg->range = 0;
+ dst_reg->raw = 0;
}
break;
case BPF_AND:
--
2.9.5
From: Ivan Khoronzhuk <ivan.khoronzhuk(a)linaro.org>
[ Upstream commit 9737cc99dd14b5b8b9d267618a6061feade8ea68 ]
After flushing all mcast entries from the table, the ones contained in
mc list of ndev are not restored when promisc mode is toggled off,
because they are considered as synched with ALE, thus, in order to
restore them after promisc mode - reset syncing info. This fix
touches only switch mode devices, including single port boards
like Beagle Bone.
Fixes: commit 5da1948969bc
("net: ethernet: ti: cpsw: fix lost of mcast packets while rx_mode update")
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk(a)linaro.org>
Reviewed-by: Grygorii Strashko <grygorii.strashko(a)ti.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/ethernet/ti/cpsw.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8cb44eabc283..a44838aac97d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -601,6 +601,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
/* Clear all mcast from ALE */
cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS, -1);
+ __dev_mc_unsync(ndev, NULL);
/* Flood All Unicast Packets to Host port */
cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1);
--
2.17.1
UBIFS's recovery code strictly assumes that a deleted inode will never
come back, therefore it removes all data which belongs to that inode
as soon it faces an inode with link count 0 in the replay list.
Before O_TMPFILE this assumption was perfectly fine. With O_TMPFILE
it can lead to data loss upon a power-cut.
Consider a journal with entries like:
0: inode X (nlink = 0) /* O_TMPFILE was created */
1: data for inode X /* Someone writes to the temp file */
2: inode X (nlink = 0) /* inode was changed, xattr, chmod, … */
3: inode X (nlink = 1) /* inode was re-linked via linkat() */
Upon replay of entry #2 UBIFS will drop all data that belongs to inode X,
this will lead to an empty file after mounting.
As solution for this problem, scan the replay list for a re-link entry
before dropping data.
Fixes: 474b93704f32 ("ubifs: Implement O_TMPFILE")
Cc: stable(a)vger.kernel.org
Reported-by: Russell Senior <russell(a)personaltelco.net>
Reported-by: Rafał Miłecki <zajec5(a)gmail.com>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
---
Russel, Rafał,
please give this patch another testing.
I'll also run it on different test systems before merging.
Thanks,
//richard
---
fs/ubifs/replay.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 4844538eb926..65a780685b82 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -209,6 +209,34 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
return ubifs_tnc_remove_range(c, &min_key, &max_key);
}
+/**
+ * inode_relinked - check whether inode in question will be re-linked.
+ * @c: UBIFS file-system description object
+ * @rino: replay entry to test
+ *
+ * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1.
+ * This case needs special care, otherwise all references to the inode will
+ * be removed upon the first replay entry of an inode with link count 0
+ * is found.
+ */
+static bool inode_relinked(struct ubifs_info *c, struct replay_entry *rino)
+{
+ struct replay_entry *r = rino;
+
+ ubifs_assert(c, rino->deletion);
+ ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY);
+
+ list_for_each_entry_from(r, &c->replay_list, list) {
+ if (key_inum(c, &r->key) == key_inum(c, &rino->key) &&
+ r->deletion == 0) {
+ ubifs_assert(c, r->sqnum > rino->sqnum);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
* apply_replay_entry - apply a replay entry to the TNC.
* @c: UBIFS file-system description object
@@ -236,6 +264,11 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
ino_t inum = key_inum(c, &r->key);
+ if (inode_relinked(c, r)) {
+ err = 0;
+ break;
+ }
+
err = ubifs_tnc_remove_ino(c, inum);
break;
}
--
2.19.1
commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the
generic_write_sync prototype") reworked callers of generic_write_sync(),
and ended up dropping the error return for the directio path. Prior to
that commit, in dio_complete(), an error would be bubbled up the stack,
but after that commit, errors passed on to dio_complete were eaten up.
This was reported on the list earlier, and a fix was proposed in
https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but
never followed up with. We recently hit this bug in our testing where
fencing io errors, which were previously erroring out with EIO, were
being returned as success operations after this commit.
The fix proposed on the list earlier was a little short -- it would have
still called generic_write_sync() in case `ret` already contained an
error. This fix ensures generic_write_sync() is only called when
there's no pending error in the write.
CC: stable(a)vger.kernel.org
Reported-by: Ravi Nankani <rnankani(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
Signed-off-by: Torsten Mehlan <tomeh(a)amazon.de>
Signed-off-by: Uwe Dannowski <uwed(a)amazon.de>
Signed-off-by: Amit Shah <aams(a)amazon.de>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
---
fs/direct-io.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54cd316..199146036093 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
*/
dio->iocb->ki_pos += transferred;
- if (dio->op == REQ_OP_WRITE)
- ret = generic_write_sync(dio->iocb, transferred);
+ if (ret > 0 && dio->op == REQ_OP_WRITE)
+ ret = generic_write_sync(dio->iocb, ret);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}
--
2.16.2
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
PageTransCompoundMap() returns true for hugetlbfs and THP
hugepages. This behaviour incorrectly leads to stage 2 faults for
unsupported hugepage sizes (e.g., 64K hugepage with 4K pages) to be
treated as THP faults.
Tighten the check to filter out hugetlbfs pages. This also leads to
consistently mapping all unsupported hugepage sizes as PTE level
entries at stage 2.
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Cc: Christoffer Dall <christoffer.dall(a)arm.com>
Cc: Marc Zyngier <marc.zyngier(a)arm.com>
Cc: stable(a)vger.kernel.org # v4.13+
---
virt/kvm/arm/mmu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 7e477b3cae5b..c23a1b323aad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1231,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
+ struct page *page = pfn_to_page(pfn);
- if (PageTransCompoundMap(pfn_to_page(pfn))) {
+ /*
+ * PageTransCompoungMap() returns true for THP and
+ * hugetlbfs. Make sure the adjustment is done only for THP
+ * pages.
+ */
+ if (!PageHuge(page) && PageTransCompoundMap(page)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
--
2.18.0
From: Waiman Long <longman(a)redhat.com>
[ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ]
It was found that when debug_locks was turned off because of a problem
found by the lockdep code, the system performance could drop quite
significantly when the lock_stat code was also configured into the
kernel. For instance, parallel kernel build time on a 4-socket x86-64
server nearly doubled.
Further analysis into the cause of the slowdown traced back to the
frequent call to debug_locks_off() from the __lock_acquired() function
probably due to some inconsistent lockdep states with debug_locks
off. The debug_locks_off() function did an unconditional atomic xchg
to write a 0 value into debug_locks which had already been set to 0.
This led to severe cacheline contention in the cacheline that held
debug_locks. As debug_locks is being referenced in quite a few different
places in the kernel, this greatly slow down the system performance.
To prevent that trashing of debug_locks cacheline, lock_acquired()
and lock_contended() now checks the state of debug_locks before
proceeding. The debug_locks_off() function is also modified to check
debug_locks before calling __debug_locks_off().
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Paul E. McKenney <paulmck(a)linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Will Deacon <will.deacon(a)arm.com>
Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.c…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/locking/lockdep.c | 4 ++--
lib/debug_locks.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index f99008534275..fb90ca3a296e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3808,7 +3808,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
@@ -3828,7 +3828,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 96c4c633d95e..124fdf238b3d 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
*/
int debug_locks_off(void)
{
- if (__debug_locks_off()) {
+ if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
--
2.17.1
From: Masahisa Kojima <masahisa.kojima(a)linaro.org>
[ Upstream commit 8d5b0bf611ec5b7618d5b772dddc93b8afa78cb8 ]
We observed that packets and bytes count are not reset
when user performs interface down. Eventually, tx queue is
exhausted and packets will not be sent out.
To avoid this problem, resets tx queue in ndo_stop.
Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver")
Signed-off-by: Masahisa Kojima <masahisa.kojima(a)linaro.org>
Signed-off-by: Yoshitoyo Osaki <osaki.yoshitoyo(a)socionext.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/ethernet/socionext/netsec.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index e080d3e7c582..4d7d53fbc0ef 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -945,6 +945,9 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
dring->head = 0;
dring->tail = 0;
dring->pkt_cnt = 0;
+
+ if (id == NETSEC_RING_TX)
+ netdev_reset_queue(priv->ndev);
}
static void netsec_free_dring(struct netsec_priv *priv, int id)
--
2.17.1
We need to make sure, that the carrier check polling is disabled
while suspending. Otherwise we can end up with usbnet_read_cmd()
being issued when only usbnet_read_cmd_nopm() is allowed. If this
happens, read operations lock up.
Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Frieder Schrempf <frieder.schrempf(a)kontron.de>
---
drivers/net/usb/smsc95xx.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 262e7a3..3bc9633 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1592,6 +1592,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ cancel_delayed_work_sync(&pdata->carrier_check);
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
@@ -1840,6 +1842,11 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
*/
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+
+ if (ret)
+ schedule_delayed_work(&pdata->carrier_check,
+ CARRIER_CHECK_DELAY);
+
return ret;
}
--
2.7.4
Dear Friend,
My name is Mr. Edward Yuan, a consultant/broker. I know you might be a bit apprehensive because you do not know me. Nevertheless, I have a proposal on behalf of a client, a lucrative business that might be of mutual benefit to you.
If interested in this proposition please kindly and urgently contact me for more details.
Best Regards.
Mr. Edward Yuan.
---
This email has been checked for viruses by AVG.
https://www.avg.com
The patch titled
Subject: memory_hotplug: cond_resched in __remove_pages
has been added to the -mm tree. Its filename is
memory_hotplug-cond_resched-in-__remove_pages.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/memory_hotplug-cond_resched-in-__r…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/memory_hotplug-cond_resched-in-__r…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Michal Hocko <mhocko(a)suse.com>
Subject: memory_hotplug: cond_resched in __remove_pages
We have received a bug report that unbinding a large pmem (>1TB) can
result in a soft lockup:
[ 380.339203] NMI watchdog: BUG: soft lockup - CPU#9 stuck for 23s! [ndctl:4365]
[...]
[ 380.339316] Supported: Yes
[ 380.339318] CPU: 9 PID: 4365 Comm: ndctl Not tainted 4.12.14-94.40-default #1 SLE12-SP4
[ 380.339318] Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018
[ 380.339319] task: ffff9cce7d4410c0 task.stack: ffffbe9eb1bc4000
[ 380.339325] RIP: 0010:__put_page+0x62/0x80
[ 380.339326] RSP: 0018:ffffbe9eb1bc7d30 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10
[ 380.339327] RAX: 000040540081c0d3 RBX: ffffeb8f03557200 RCX: 000063af40000000
[ 380.339328] RDX: 0000000000000002 RSI: ffff9cce75bff498 RDI: ffff9e4a76072ff8
[ 380.339329] RBP: 0000000a43557200 R08: 0000000000000000 R09: ffffbe9eb1bc7bb0
[ 380.339329] R10: ffffbe9eb1bc7d08 R11: 0000000000000000 R12: ffff9e194a22a0e0
[ 380.339330] R13: ffff9cce7062fc10 R14: ffff9e194a22a0a0 R15: ffff9cce6559c0e0
[ 380.339331] FS: 00007fd132368880(0000) GS:ffff9cce7ea40000(0000) knlGS:0000000000000000
[ 380.339332] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 380.339332] CR2: 00000000020820a0 CR3: 000000017ef7a003 CR4: 00000000007606e0
[ 380.339333] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 380.339334] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 380.339334] PKRU: 55555554
[ 380.339334] Call Trace:
[ 380.339338] devm_memremap_pages_release+0x152/0x260
[ 380.339342] release_nodes+0x18d/0x1d0
[ 380.339347] device_release_driver_internal+0x160/0x210
[ 380.339350] unbind_store+0xb3/0xe0
[ 380.339355] kernfs_fop_write+0x102/0x180
[ 380.339358] __vfs_write+0x26/0x150
[ 380.339363] ? security_file_permission+0x3c/0xc0
[ 380.339364] vfs_write+0xad/0x1a0
[ 380.339366] SyS_write+0x42/0x90
[ 380.339370] do_syscall_64+0x74/0x150
[ 380.339375] entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[ 380.339377] RIP: 0033:0x7fd13166b3d0
It has been reported on an older (4.12) kernel but the current upstream
code doesn't cond_resched in the hot remove code at all and the given
range to remove might be really large. Fix the issue by calling
cond_resched once per memory section.
Link: http://lkml.kernel.org/r/20181031125840.23982-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Johannes Thumshirn <jthumshirn(a)suse.de>
Cc: Dan Williams <dan.j.williams(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/memory_hotplug.c~memory_hotplug-cond_resched-in-__remove_pages
+++ a/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, un
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ cond_resched();
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap);
map_offset = 0;
_
Patches currently in -mm which might be from mhocko(a)suse.com are
memory_hotplug-cond_resched-in-__remove_pages.patch
mm-thp-consolidate-thp-gfp-handling-into-alloc_hugepage_direct_gfpmask.patch
The patch titled
Subject: kbuild: fix kernel/bounds.c 'W=1' warning
has been removed from the -mm tree. Its filename was
kbuild-fix-kernel-boundsc-w=1-warning.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Arnd Bergmann <arnd(a)arndb.de>
Subject: kbuild: fix kernel/bounds.c 'W=1' warning
Building any configuration with 'make W=1' produces a warning:
kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes]
When also passing -Werror, this prevents us from building any other files.
Nobody ever calls the function, but we can't make it 'static' either
since we want the compiler output.
Calling it 'main' instead however avoids the warning, because gcc
does not insist on having a declaration for main.
Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Reported-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: Masahiro Yamada <yamada.masahiro(a)socionext.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/bounds.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/kernel/bounds.c~kbuild-fix-kernel-boundsc-w=1-warning
+++ a/kernel/bounds.c
@@ -13,7 +13,7 @@
#include <linux/log2.h>
#include <linux/spinlock_types.h>
-void foo(void)
+int main(void)
{
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
@@ -23,4 +23,6 @@ void foo(void)
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
/* End of constants */
+
+ return 0;
}
_
Patches currently in -mm which might be from arnd(a)arndb.de are
ocfs2-dlmglue-clean-up-timestamp-handling.patch
vfs-replace-current_kernel_time64-with-ktime-equivalent.patch
The patch titled
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
has been removed from the -mm tree. Its filename was
mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal to
NULL. Fix this by first unregistering mmu notifier callbacks and then
setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Link: http://lkml.kernel.org/r/20181019160442.18723-4-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
--- a/mm/hmm.c~mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback
+++ a/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct m
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct m
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mi
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
The patch titled
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
has been removed from the -mm tree. Its filename was
mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled in
check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back to any
private ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead to the
CPU constantly page fault looping on the special swap entry.
Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_vma_mapped.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
--- a/mm/page_vma_mapped.c~mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3
+++ a/mm/page_vma_mapped.c
@@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapp
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
- if (!pte_present(*pvmw->pte))
+ /*
+ * We get here when we are trying to unmap a private
+ * device page from the process address space. Such
+ * page is not CPU accessible and thus is mapped as
+ * a special swap entry, nonetheless it still does
+ * count as a valid regular mapping for the page (and
+ * is accounted as such in page maps count).
+ *
+ * So handle this special case as if it was a normal
+ * page mapping ie lock CPU page table and returns
+ * true.
+ *
+ * For more details on device private memory see HMM
+ * (include/linux/hmm.h or mm/hmm.c).
+ */
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
+ return false;
+ } else if (!pte_present(*pvmw->pte))
return false;
}
}
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
From: "David S. Miller" <davem(a)davemloft.net>
When processing using 'perf report -g caller', which is the default, we
ended up reverting the callchain entries received from the kernel, but
simply reverting throws away the information that tells that from a
point onwards the addresses are for userspace, kernel, guest kernel,
guest user, hypervisor.
The idea is that if we are walking backwards, for each cluster of
non-cpumode entries we have to first scan backwards for the next one and
use that for the cluster.
This seems silly and more expensive than it needs to be but it is enough
for a initial fix.
The code here is really complicated because it is intimately intertwined
with the lbr and branch handling, as well as this callchain order,
further fixes will be needed to properly take into account the cpumode
in those cases.
Another problem with ORDER_CALLER is that the NULL "0" IP that is at the
end of most callchains shows up at the top of the histogram because
every callchain contains it and with ORDER_CALLER it is the first entry.
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Tested-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: David Ahern <dsahern(a)gmail.com>
Cc: Jiri Olsa <jolsa(a)kernel.org>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Cc: Souvik Banerjee <souvik1997(a)gmail.com>
Cc: Wang Nan <wangnan0(a)huawei.com>
Cc: stable(a)vger.kernel.org # 4.19
Link: https://lkml.kernel.org/n/tip-2wt3ayp6j2y2f2xowixa8y6y@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
---
tools/perf/util/machine.c | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..8ee8ab39d8ac 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2140,6 +2140,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
return 0;
}
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct perf_evsel *evsel,
@@ -2246,6 +2267,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
+ if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
for (i = first_call, nr_entries = 0;
i < chain_nr && nr_entries < max_stack; i++) {
u64 ip;
@@ -2260,9 +2287,15 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
#endif
ip = chain->ips[j];
-
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
--
2.14.4
Initially we bumped into problem with 32-bit aligned atomic64_t
on ARC, see [1]. And then during quite lengthly discussion Peter Z.
mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense.
If allocation is done by plain kmalloc() obtained buffer will be
ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via
devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of
allocation.
[1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004009.html
[2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004036.html
Signed-off-by: Alexey Brodkin <abrodkin(a)synopsys.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Geert Uytterhoeven <geert(a)linux-m68k.org>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Vineet Gupta <vgupta(a)synopsys.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: Greg KH <greg(a)kroah.com>
Cc: <stable(a)vger.kernel.org> # 4.8+
---
Changes v3 -> v4:
* Use ARCH_KMALLOC_MINALIGN for alignment instead of "8" [Peter]
Changes v2 -> v3:
* Align explicitly to 8 bytes [David]
* Rephrased in-line comment [David]
* Added more techinical details to commit message [Greg]
* Mention more alignment options in commit message [Geert]
Changes v1 -> v2:
* Reworded commit message
* Inserted comment right in source [Thomas]
drivers/base/devres.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 4aaf00d2098b..e038e2b3b7ea 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -26,8 +26,14 @@ struct devres_node {
struct devres {
struct devres_node node;
- /* -- 3 pointers */
- unsigned long long data[]; /* guarantee ull alignment */
+ /*
+ * Some archs want to perform DMA into kmalloc caches
+ * and need a guaranteed alignment larger than
+ * the alignment of a 64-bit integer.
+ * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+ * buffer alignment as if it was allocated by plain kmalloc().
+ */
+ u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
};
struct devres_group {
--
2.17.2
Hi Greg and other -stable maintainers
Please consider adding da15fc2fa9c (perf tools: Disable parallelism for
'make clean') to your -stable trees. Spurious build failures like that
make it harder to do automatic tests of new -stable (and -stable-rc)
versions.
Thanks,
Rasmus
Hi,
please consider reverting
commit 84379c9afe011020e797e3f50a662b08a6355dcf
netfilter: ipv6: nf_defrag: drop skb dst before queueing
It causes kernel crash for locally generated ipv6 fragments
when netfilter ipv6 defragmentation is used.
The faulty commit is not essential for -stable, it only
delays netns teardown for longer than needed when that netns
still has ipv6 frags queued. Much better than crash :-/
commit ids are:
4.4.y: not affected (not backported)
4.9.y: backported as ad8b1ffc3efae2f65080bdb11145c87d299b8f9a
4.14.y: backported as 28c74ff85efd192aeca9005499ca50c24d795f61
4.18.y: (first affected kernel): 84379c9afe011020e797e3f50a662b08a6355dcf
For 4.19.y, you could also wait for a bug fix to hit Linus tree,
I can ping you again once its in:
https://patchwork.ozlabs.org/patch/988233/
Thanks,
Florian