The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 432061b3da64e488be3403124a72a9250bbe96d4 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Wed, 5 Sep 2018 09:17:45 -0400
Subject: [PATCH] dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL
recursion deadlock
There's a XFS on dm-crypt deadlock, recursing back to itself due to the
crypto subsystems use of GFP_KERNEL, reported here:
https://bugzilla.kernel.org/show_bug.cgi?id=200835
* dm-crypt calls crypt_convert in xts mode
* init_crypt from xts.c calls kmalloc(GFP_KERNEL)
* kmalloc(GFP_KERNEL) recurses into the XFS filesystem, the filesystem
tries to submit some bios and wait for them, causing a deadlock
Fix this by updating both the DM crypt and integrity targets to no
longer use the CRYPTO_TFM_REQ_MAY_SLEEP flag, which will change the
crypto allocations from GFP_KERNEL to GFP_ATOMIC, therefore they can't
recurse into a filesystem. A GFP_ATOMIC allocation can fail, but
init_crypt() in xts.c handles the allocation failure gracefully - it
will fall back to preallocated buffer if the allocation fails.
The crypto API maintainer says that the crypto API only needs to
allocate memory when dealing with unaligned buffers and therefore
turning CRYPTO_TFM_REQ_MAY_SLEEP off is safe (see this discussion:
https://www.redhat.com/archives/dm-devel/2018-August/msg00195.html )
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f266c81f396f..0481223b1deb 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -332,7 +332,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
int err;
desc->tfm = essiv->hash_tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc->flags = 0;
err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt);
shash_desc_zero(desc);
@@ -606,7 +606,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
int i, r;
desc->tfm = lmk->hash_tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc->flags = 0;
r = crypto_shash_init(desc);
if (r)
@@ -768,7 +768,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
/* calculate crc32 for every 32bit part and xor it */
desc->tfm = tcw->crc32_tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc->flags = 0;
for (i = 0; i < 4; i++) {
r = crypto_shash_init(desc);
if (r)
@@ -1251,7 +1251,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
* requests if driver request queue is full.
*/
skcipher_request_set_callback(ctx->r.req,
- CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
}
@@ -1268,7 +1268,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
* requests if driver request queue is full.
*/
aead_request_set_callback(ctx->r.req_aead,
- CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 378878599466..89ccb64342de 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -532,7 +532,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
unsigned j, size;
desc->tfm = ic->journal_mac;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc->flags = 0;
r = crypto_shash_init(desc);
if (unlikely(r)) {
@@ -676,7 +676,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err)
static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
{
int r;
- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
complete_journal_encrypt, comp);
if (likely(encrypt))
r = crypto_skcipher_encrypt(req);
c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has
already fixed this race, however the implied synchronize_rcu()
in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused
performance regression.
Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()")
tried to quiesce queue for avoiding unnecessary synchronize_rcu()
only when queue initialization is done, because it is usual to see
lots of inexistent LUNs which need to be probed.
However, turns out it isn't safe to quiesce queue only when queue
initialization is done. Because when one SCSI command is completed,
the user of sending command can be waken up immediately, then the
scsi device may be removed, meantime the run queue in scsi_end_request()
is still in-progress, so kernel panic can be caused.
In Red Hat QE lab, there are several reports about this kind of kernel
panic triggered during kernel booting.
This patch tries to address the issue by grabing one queue usage
counter during freeing one request and the following run queue.
Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()")
Cc: Andrew Jones <drjones(a)redhat.com>
Cc: Bart Van Assche <bart.vanassche(a)wdc.com>
Cc: linux-scsi(a)vger.kernel.org
Cc: Martin K. Petersen <martin.petersen(a)oracle.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: James E.J. Bottomley <jejb(a)linux.vnet.ibm.com>
Cc: stable <stable(a)vger.kernel.org>
Cc: jianchao.wang <jianchao.w.wang(a)oracle.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-core.c | 5 ++---
drivers/scsi/scsi_lib.c | 8 ++++++++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index ce12515f9b9b..deb56932f8c4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -798,9 +798,8 @@ void blk_cleanup_queue(struct request_queue *q)
* dispatch may still be in-progress since we dispatch requests
* from more than one contexts.
*
- * No need to quiesce queue if it isn't initialized yet since
- * blk_freeze_queue() should be enough for cases of passthrough
- * request.
+ * We rely on driver to deal with the race in case that queue
+ * initialization isn't done.
*/
if (q->mq_ops && blk_queue_init_done(q))
blk_mq_quiesce_queue(q);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c7fccbb8f554..fa6e0c3b3aa6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -697,6 +697,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
*/
scsi_mq_uninit_cmd(cmd);
+ /*
+ * queue is still alive, so grab the ref for preventing it
+ * from being cleaned up during running queue.
+ */
+ percpu_ref_get(&q->q_usage_counter);
+
__blk_mq_end_request(req, error);
if (scsi_target(sdev)->single_lun ||
@@ -704,6 +710,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
kblockd_schedule_work(&sdev->requeue_work);
else
blk_mq_run_hw_queues(q, true);
+
+ percpu_ref_put(&q->q_usage_counter);
} else {
unsigned long flags;
--
2.9.5
On Sat, Jan 26, 2019 at 04:09:45PM +0000, Sasha Levin wrote:
> Hi,
>
> [This is an automated email]
>
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: a85f42047533 device property: helper macros for property entry creation.
>
> The bot has tested the following trees: v4.20.4, v4.19.17, v4.14.95, v4.9.152.
>
> v4.20.4: Build OK!
> v4.19.17: Build OK!
> v4.14.95: Failed to apply! Possible dependencies:
> 63dcc7090137 ("device property: Get rid of union aliasing")
> 6e98503dba64 ("efi/apple-properties: Remove redundant attribute initialization from unmarshal_key_value_pairs()")
> b2ca8bdff60c ("device property: Reuse property_entry_free_data()")
> d5f962fa269e ("device property: Move property_entry_free_data() upper")
>
> v4.9.152: Failed to apply! Possible dependencies:
> 027b25b26447 ("ACPI: Add FWNODE_ACPI_STATIC fwnode type")
> 07bb80d40b0e ("device property: Add support for remote endpoints")
> 0f194992c85f ("device property: Fix reading pset strings using array access functions")
> 1df09bc66f9b ("of: Move OF property and graph API from base.c to property.c")
> 21ea73f54c6d ("device property: Add fwnode_get_named_child_node()")
> 2475a2b6c877 ("drivers/of/base.c: Add of_property_read_u64_index")
> 2bd5452d46df ("device property: Add support for fwnode endpoints")
> 34055190b19d ("ACPI / property: Add fwnode_get_next_child_node()")
> 3708184afc77 ("device property: Move FW type specific functionality to FW specific files")
> 39e5aeed835d ("device property: Constify argument to pset fwnode backend")
> 3b27d00e7b6d ("device property: Move fwnode graph ops to firmware specific locations")
> 5fa23530d4fc ("of: base: add support to find the level of the last cache")
> 63dcc7090137 ("device property: Get rid of union aliasing")
> 79389a83bc38 ("ACPI / property: Add support for remote endpoints")
> 99db5ff7fe0b ("ACPI / property: Hierarchical properties support update")
> afaf26fd8458 ("device property: Add fwnode_get_parent()")
> b85ad494098b ("of: introduce of_graph_get_remote_node")
> bec84da8d1da ("device property: allow to constify properties")
> db3e50f3234b ("device property: Get rid of struct fwnode_handle type field")
> dfa672fbc0d9 ("ACPI / property: Add possiblity to retrieve parent firmware node")
> e7887c284969 ("device property: Add fwnode_handle_get()")
>
>
> How should we proceed with this patch?
Can we use the attached separate patches with v4.9 and v4.14 ?
thanks,
--
heikki
This is a follow-up to the discussion in [1], [2].
IOMMUs using ARMv7 short-descriptor format require page tables
(level 1 and 2) to be allocated within the first 4GB of RAM, even
on 64-bit systems.
For L1 tables that are bigger than a page, we can just use __get_free_pages
with GFP_DMA32 (on arm64 systems only, arm would still use GFP_DMA).
For L2 tables that only take 1KB, it would be a waste to allocate a full
page, so we considered 3 approaches:
1. This series, adding support for GFP_DMA32 slab caches.
2. genalloc, which requires pre-allocating the maximum number of L2 page
tables (4096, so 4MB of memory).
3. page_frag, which is not very memory-efficient as it is unable to reuse
freed fragments until the whole page is freed. [3]
This series is the most memory-efficient approach.
stable@ note:
We confirmed that this is a regression, and IOMMU errors happen on 4.19
and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue
most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA
with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek
platforms (and maybe others?).
[1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html
[2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html
[3] https://patchwork.codeaurora.org/patch/671639/
Changes since v1:
- Add support for SLAB_CACHE_DMA32 in slab and slub (patches 1/2)
- iommu/io-pgtable-arm-v7s (patch 3):
- Changed approach to use SLAB_CACHE_DMA32 added by the previous
commit.
- Use DMA or DMA32 depending on the architecture (DMA for arm,
DMA32 for arm64).
Changes since v2:
- Reworded and expanded commit messages
- Added cache_dma32 documentation in PATCH 2/3.
v3 used the page_frag approach, see [3].
Changes since v4:
- Dropped change that removed GFP_DMA32 from GFP_SLAB_BUG_MASK:
instead we can just call kmem_cache_*alloc without GFP_DMA32
parameter. This also means that we can drop PATCH v4 1/3, as we
do not make any changes in GFP flag verification.
- Dropped hunks that added cache_dma32 sysfs file, and moved
the hunks to PATCH v5 3/3, so that maintainer can decide whether
to pick the change independently.
Changes since v5:
- Rename ARM_V7S_TABLE_SLAB_CACHE to ARM_V7S_TABLE_SLAB_FLAGS.
- Add stable@ to cc.
Nicolas Boichat (3):
mm: Add support for kmem caches in DMA32 zone
iommu/io-pgtable-arm-v7s: Request DMA32 memory, and improve debugging
mm: Add /sys/kernel/slab/cache/cache_dma32
Documentation/ABI/testing/sysfs-kernel-slab | 9 +++++++++
drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++----
include/linux/slab.h | 2 ++
mm/slab.c | 2 ++
mm/slab.h | 3 ++-
mm/slab_common.c | 2 +-
mm/slub.c | 16 ++++++++++++++++
tools/vm/slabinfo.c | 7 ++++++-
8 files changed, 53 insertions(+), 7 deletions(-)
--
2.20.0.rc2.403.gdbc3b29805-goog
Commit-ID: 120e4e76857ddbc9268e1aa3f9de61a498e84618
Gitweb: https://git.kernel.org/tip/120e4e76857ddbc9268e1aa3f9de61a498e84618
Author: Kangjie Lu <kjlu(a)umn.edu>
AuthorDate: Wed, 9 Jan 2019 01:45:24 -0600
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitDate: Mon, 21 Jan 2019 11:26:17 +0100
sched/core: Fix a potential double-fetch bug in sched_copy_attr()
"uattr->size" is copied in from user space and checked. However, it is
copied in again after the security check. A malicious user may race to
change it. The fix sets uattr->size to be the checked size.
Signed-off-by: Kangjie Lu <kjlu(a)umn.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: pakki001(a)umn.edu
Cc: <stable(a)vger.kernel.org>
Link: https://lkml.kernel.org/r/20190109074524.10176-1-kjlu@umn.edu
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
---
kernel/sched/core.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..d4d3514c4fe9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4499,6 +4499,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
if (ret)
return -EFAULT;
+ /* In case attr->size was changed by user-space: */
+ attr->size = size;
+
/*
* XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values?
Files can be created and mapped in an explicitly mounted hugetlbfs
filesystem. If pages in such files are migrated, the filesystem
usage will not be decremented for the associated pages. This can
result in mmap or page allocation failures as it appears there are
fewer pages in the filesystem than there should be.
For example, a test program which hole punches, faults and migrates
pages in such a file (1G in size) will eventually fail because it
can not allocate a page. Reported counts and usage at time of failure:
node0
537 free_hugepages
1024 nr_hugepages
0 surplus_hugepages
node1
1000 free_hugepages
1024 nr_hugepages
0 surplus_hugepages
Filesystem Size Used Avail Use% Mounted on
nodev 4.0G 4.0G 0 100% /var/opt/hugepool
Note that the filesystem shows 4G of pages used, while actual usage is
511 pages (just under 1G). Failed trying to allocate page 512.
If a hugetlb page is associated with an explicitly mounted filesystem,
this information in contained in the page_private field. At migration
time, this information is not preserved. To fix, simply transfer
page_private from old to new page at migration time if necessary. Also,
migrate_page_states() unconditionally clears page_private and PagePrivate
of the old page. It is unlikely, but possible that these fields could
be non-NULL and are needed at hugetlb free page time. So, do not touch
these fields for hugetlb pages.
Cc: <stable(a)vger.kernel.org>
Fixes: 290408d4a250 ("hugetlb: hugepage migration core")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
fs/hugetlbfs/inode.c | 10 ++++++++++
mm/migrate.c | 10 ++++++++--
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 32920a10100e..fb6de1db8806 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -859,6 +859,16 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
+
+ /*
+ * page_private is subpool pointer in hugetlb pages, transfer
+ * if needed.
+ */
+ if (page_private(page) && !page_private(newpage)) {
+ set_page_private(newpage, page_private(page));
+ set_page_private(page, 0);
+ }
+
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
else
diff --git a/mm/migrate.c b/mm/migrate.c
index f7e4bfdc13b7..0d9708803553 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -703,8 +703,14 @@ void migrate_page_states(struct page *newpage, struct page *page)
*/
if (PageSwapCache(page))
ClearPageSwapCache(page);
- ClearPagePrivate(page);
- set_page_private(page, 0);
+ /*
+ * Unlikely, but PagePrivate and page_private could potentially
+ * contain information needed at hugetlb free page time.
+ */
+ if (!PageHuge(page)) {
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ }
/*
* If any waiters have accumulated on the new page then
--
2.17.2
Take a parent rate of 180 MHz, and a requested rate of 4.285715 MHz.
This results in a theorical divider of 41.999993 which is then rounded
up to 42. The .round_rate function would then return (180 MHz / 42) as
the clock, rounded down, so 4.285714 MHz.
Calling clk_set_rate on 4.285714 MHz would round the rate again, and
give a theorical divider of 42,0000028, now rounded up to 43, and the
rate returned would be (180 MHz / 43) which is 4.186046 MHz, aka. not
what we requested.
Fix this by rounding up the divisions.
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
Tested-by: Maarten ter Huurne <maarten(a)treewalker.org>
Cc: <stable(a)vger.kernel.org>
---
drivers/clk/ingenic/cgu.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c
index 5ef7d9ba2195..b40160eb3372 100644
--- a/drivers/clk/ingenic/cgu.c
+++ b/drivers/clk/ingenic/cgu.c
@@ -426,16 +426,16 @@ ingenic_clk_round_rate(struct clk_hw *hw, unsigned long req_rate,
struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw);
struct ingenic_cgu *cgu = ingenic_clk->cgu;
const struct ingenic_cgu_clk_info *clk_info;
- long rate = *parent_rate;
+ unsigned int div = 1;
clk_info = &cgu->clock_info[ingenic_clk->idx];
if (clk_info->type & CGU_CLK_DIV)
- rate /= ingenic_clk_calc_div(clk_info, *parent_rate, req_rate);
+ div = ingenic_clk_calc_div(clk_info, *parent_rate, req_rate);
else if (clk_info->type & CGU_CLK_FIXDIV)
- rate /= clk_info->fixdiv.div;
+ div = clk_info->fixdiv.div;
- return rate;
+ return DIV_ROUND_UP(*parent_rate, div);
}
static int
@@ -455,7 +455,7 @@ ingenic_clk_set_rate(struct clk_hw *hw, unsigned long req_rate,
if (clk_info->type & CGU_CLK_DIV) {
div = ingenic_clk_calc_div(clk_info, parent_rate, req_rate);
- rate = parent_rate / div;
+ rate = DIV_ROUND_UP(parent_rate, div);
if (rate != req_rate)
return -EINVAL;
--
2.20.1.495.gaa96b0ce6b
This is the start of the stable review cycle for the 4.19.1 release.
There are 24 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun Nov 4 18:28:10 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.1-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.1-rc1
Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com>
net: bridge: remove ipv6 zero address check in mcast queries
David S. Miller <davem(a)davemloft.net>
sparc64: Wire up compat getpeername and getsockname.
David Miller <davem(a)redhat.com>
sparc64: Make corrupted user stacks more debuggable.
David S. Miller <davem(a)davemloft.net>
sparc64: Export __node_distance.
Xin Long <lucien.xin(a)gmail.com>
sctp: check policy more carefully when getting pr status
Ivan Vecera <ivecera(a)redhat.com>
Revert "be2net: remove desc field from be_eq_obj"
Heiner Kallweit <hkallweit1(a)gmail.com>
r8169: fix broken Wake-on-LAN from S5 (poweroff)
David S. Miller <davem(a)davemloft.net>
net: Properly unlink GRO packets on overflow.
Cong Wang <xiyou.wangcong(a)gmail.com>
net: drop skb on failure in ip_check_defrag()
Shalom Toledo <shalomt(a)mellanox.com>
mlxsw: core: Fix devlink unregister flow
Petr Machata <petrm(a)mellanox.com>
mlxsw: spectrum_switchdev: Don't ignore deletions of learned MACs
Karsten Graul <kgraul(a)linux.ibm.com>
net/smc: fix smc_buf_unuse to use the lgr pointer
David Ahern <dsahern(a)gmail.com>
net/ipv6: Allow onlink routes to have a device mismatch if it is the default route
Jaime Caamaño Ruiz <jcaamano(a)suse.com>
openvswitch: Fix push/pop ethernet validation
Tobias Jungel <tobias.jungel(a)gmail.com>
bonding: fix length of actor system
Jason Wang <jasowang(a)redhat.com>
vhost: Fix Spectre V1 vulnerability
Ido Schimmel <idosch(a)mellanox.com>
rtnetlink: Disallow FDB configuration for non-Ethernet device
Karsten Graul <kgraul(a)linux.ibm.com>
Revert "net: simplify sock_poll_wait"
Sean Tranchetti <stranche(a)codeaurora.org>
net: udp: fix handling of CHECKSUM_COMPLETE packets
Niklas Cassel <niklas.cassel(a)linaro.org>
net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules
Jakub Kicinski <jakub.kicinski(a)netronome.com>
net: sched: gred: pass the right attribute to gred_change_table_def()
Eric Dumazet <edumazet(a)google.com>
net/mlx5e: fix csum adjustments caused by RXFCS
Stefano Brivio <sbrivio(a)redhat.com>
ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called
Hangbin Liu <liuhangbin(a)gmail.com>
bridge: do not add port to router list when receives query with source 0.0.0.0
-------------
Diffstat:
Makefile | 4 +-
arch/sparc/include/asm/switch_to_64.h | 3 +-
arch/sparc/kernel/process_64.c | 25 +++++++++---
arch/sparc/kernel/rtrap_64.S | 1 +
arch/sparc/kernel/signal32.c | 12 +++++-
arch/sparc/kernel/signal_64.c | 6 ++-
arch/sparc/kernel/systbls_64.S | 4 +-
arch/sparc/mm/init_64.c | 1 +
crypto/af_alg.c | 2 +-
drivers/net/bonding/bond_netlink.c | 3 +-
drivers/net/ethernet/emulex/benet/be.h | 1 +
drivers/net/ethernet/emulex/benet/be_main.c | 6 +--
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 45 +++++-----------------
drivers/net/ethernet/mellanox/mlxsw/core.c | 24 ++++++++----
.../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 -
drivers/net/ethernet/realtek/r8169.c | 9 ++++-
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +-
drivers/vhost/vhost.c | 2 +
include/net/sock.h | 12 ++++--
net/atm/common.c | 2 +-
net/bridge/br_multicast.c | 9 ++++-
net/caif/caif_socket.c | 2 +-
net/core/datagram.c | 7 ++--
net/core/dev.c | 1 +
net/core/rtnetlink.c | 10 +++++
net/dccp/proto.c | 2 +-
net/ipv4/ip_fragment.c | 12 ++++--
net/ipv4/tcp.c | 2 +-
net/ipv4/udp.c | 20 +++++++++-
net/ipv6/ip6_checksum.c | 20 +++++++++-
net/ipv6/ndisc.c | 3 +-
net/ipv6/route.c | 2 +
net/iucv/af_iucv.c | 2 +-
net/nfc/llcp_sock.c | 2 +-
net/openvswitch/flow_netlink.c | 4 +-
net/rxrpc/af_rxrpc.c | 2 +-
net/sched/sch_gred.c | 2 +-
net/sctp/socket.c | 8 ++--
net/smc/af_smc.c | 2 +-
net/smc/smc_core.c | 25 ++++++------
net/tipc/socket.c | 2 +-
net/unix/af_unix.c | 4 +-
tools/testing/selftests/net/fib-onlink-tests.sh | 14 +++----
43 files changed, 200 insertions(+), 123 deletions(-)
The mce handler for 'nfit' devices is called for memory errors on a
Non-Volatile DIMM, and adds the error location to a 'badblocks' list.
This list is used by the various NVDIMM drivers to avoid consuming known
poison locations during IO.
The mce handler gets called for both corrected and uncorrectable errors.
Until now, both kinds of errors have been added to the badblocks list.
However, corrected memory errors indicate that the problem has already
been fixed by hardware, and the resulting interrupt is merely a
notification to Linux. As far as future accesses to that location are
concerned, it is perfectly fine to use, and thus doesn't need to be
included in the above badblocks list.
Add a check in the nfit mce handler to filter out corrected mce events,
and only process uncorrectable errors.
Reported-by: Omar Avelar <omar.avelar(a)intel.com>
Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Cc: stable(a)vger.kernel.org
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 3 ++-
drivers/acpi/nfit/mce.c | 4 ++--
3 files changed, 5 insertions(+), 3 deletions(-)
v3: Unchanged from v2
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3a17107594c8..3111b3cee2ee 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -216,6 +216,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
+bool mce_is_correctable(struct mce *m);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 953b3ce92dcc..27015948bc41 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
-static bool mce_is_correctable(struct mce *m)
+bool mce_is_correctable(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
return false;
@@ -544,6 +544,7 @@ static bool mce_is_correctable(struct mce *m)
return true;
}
+EXPORT_SYMBOL_GPL(mce_is_correctable);
static bool cec_add_mce(struct mce *m)
{
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index e9626bf6ca29..7a51707f87e9 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
struct acpi_nfit_desc *acpi_desc;
struct nfit_spa *nfit_spa;
- /* We only care about memory errors */
- if (!mce_is_memory_error(mce))
+ /* We only care about uncorrectable memory errors */
+ if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
return NOTIFY_DONE;
/*
--
2.17.1