This is a note to let you know that I've just added the patch titled
ipv4: igmp: guard against silly MTU values
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipv4-igmp-guard-against-silly-mtu-values.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 11 Dec 2017 07:17:39 -0800
Subject: ipv4: igmp: guard against silly MTU values
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit b5476022bbada3764609368f03329ca287528dc8 ]
IPv4 stack reacts to changes to small MTU, by disabling itself under
RTNL.
But there is a window where threads not using RTNL can see a wrong
device mtu. This can lead to surprises, in igmp code where it is
assumed the mtu is suitable.
Fix this by reading device mtu once and checking IPv4 minimal MTU.
This patch adds missing IPV4_MIN_MTU define, to not abuse
ETH_MIN_MTU anymore.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/net/ip.h | 1 +
net/ipv4/devinet.c | 2 +-
net/ipv4/igmp.c | 24 +++++++++++++++---------
net/ipv4/ip_tunnel.c | 4 ++--
4 files changed, 19 insertions(+), 12 deletions(-)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -34,6 +34,7 @@
#include <net/flow_dissector.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
+#define IPV4_MIN_MTU 68 /* RFC 791 */
struct sock;
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1420,7 +1420,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -404,16 +404,17 @@ static int grec_size(struct ip_mc_list *
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -436,12 +437,17 @@ static struct sk_buff *add_grec(struct s
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +468,7 @@ static struct sk_buff *add_grec(struct s
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -498,12 +504,12 @@ static struct sk_buff *add_grec(struct s
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -538,7 +544,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.14/tcp-refresh-tcp_mstamp-from-timers-callbacks.patch
queue-4.14/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch
queue-4.14/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch
queue-4.14/tcp-invalidate-rate-samples-during-sack-reneging.patch
queue-4.14/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
queue-4.14/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
queue-4.14/ipv4-igmp-guard-against-silly-mtu-values.patch
queue-4.14/tcp-fix-potential-underestimation-on-rcv_rtt.patch
queue-4.14/ipv6-mcast-better-catch-silly-mtu-values.patch
This is a note to let you know that I've just added the patch titled
ipv4: Fix use-after-free when flushing FIB tables
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipv4-fix-use-after-free-when-flushing-fib-tables.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: Ido Schimmel <idosch(a)mellanox.com>
Date: Wed, 20 Dec 2017 19:34:19 +0200
Subject: ipv4: Fix use-after-free when flushing FIB tables
From: Ido Schimmel <idosch(a)mellanox.com>
[ Upstream commit b4681c2829e24943aadd1a7bb3a30d41d0a20050 ]
Since commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") the
local table uses the same trie allocated for the main table when custom
rules are not in use.
When a net namespace is dismantled, the main table is flushed and freed
(via an RCU callback) before the local table. In case the callback is
invoked before the local table is iterated, a use-after-free can occur.
Fix this by iterating over the FIB tables in reverse order, so that the
main table is always freed after the local table.
v3: Reworded comment according to Alex's suggestion.
v2: Add a comment to make the fix more explicit per Dave's and Alex's
feedback.
Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
Signed-off-by: Ido Schimmel <idosch(a)mellanox.com>
Reported-by: Fengguang Wu <fengguang.wu(a)intel.com>
Acked-by: Alexander Duyck <alexander.h.duyck(a)intel.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/fib_frontend.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1274,14 +1274,19 @@ err_table_hash_alloc:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
Patches currently in stable-queue which might be from idosch(a)mellanox.com are
queue-4.14/ipv6-honor-specified-parameters-in-fibmatch-lookup.patch
queue-4.14/mlxsw-spectrum-disable-mac-learning-for-ovs-port.patch
queue-4.14/ipv4-fix-use-after-free-when-flushing-fib-tables.patch
This is a note to let you know that I've just added the patch titled
ipv4: fib: Fix metrics match when deleting a route
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipv4-fib-fix-metrics-match-when-deleting-a-route.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: Phil Sutter <phil(a)nwl.cc>
Date: Tue, 19 Dec 2017 15:17:13 +0100
Subject: ipv4: fib: Fix metrics match when deleting a route
From: Phil Sutter <phil(a)nwl.cc>
[ Upstream commit d03a45572efa068fa64db211d6d45222660e76c5 ]
The recently added fib_metrics_match() causes a regression for routes
with both RTAX_FEATURES and RTAX_CC_ALGO if the latter has
TCP_CONG_NEEDS_ECN flag set:
| # ip link add d0 type dummy
| # ip link set d0 up
| # ip route add 172.29.29.0/24 dev d0 features ecn congctl dctcp
| # ip route del 172.29.29.0/24 dev d0 features ecn congctl dctcp
| RTNETLINK answers: No such process
During route insertion, fib_convert_metrics() detects that the given CC
algo requires ECN and hence sets DST_FEATURE_ECN_CA bit in
RTAX_FEATURES.
During route deletion though, fib_metrics_match() compares stored
RTAX_FEATURES value with that from userspace (which obviously has no
knowledge about DST_FEATURE_ECN_CA) and fails.
Fixes: 5f9ae3d9e7e4a ("ipv4: do metrics match when looking up and deleting a route")
Signed-off-by: Phil Sutter <phil(a)nwl.cc>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/fib_semantics.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -706,7 +706,7 @@ bool fib_metrics_match(struct fib_config
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
- u32 val;
+ u32 fi_val, val;
if (!type)
continue;
@@ -723,7 +723,11 @@ bool fib_metrics_match(struct fib_config
val = nla_get_u32(nla);
}
- if (fi->fib_metrics->metrics[type - 1] != val)
+ fi_val = fi->fib_metrics->metrics[type - 1];
+ if (type == RTAX_FEATURES)
+ fi_val &= ~DST_FEATURE_ECN_CA;
+
+ if (fi_val != val)
return false;
}
Patches currently in stable-queue which might be from phil(a)nwl.cc are
queue-4.14/ipv4-fib-fix-metrics-match-when-deleting-a-route.patch
This is a note to let you know that I've just added the patch titled
ip6_gre: fix device features for ioctl setup
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ip6_gre-fix-device-features-for-ioctl-setup.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: Alexey Kodanev <alexey.kodanev(a)oracle.com>
Date: Wed, 20 Dec 2017 19:36:03 +0300
Subject: ip6_gre: fix device features for ioctl setup
From: Alexey Kodanev <alexey.kodanev(a)oracle.com>
[ Upstream commit e5a9336adb317db55eb3fe8200856096f3c71109 ]
When ip6gre is created using ioctl, its features, such as
scatter-gather, GSO and tx-checksumming will be turned off:
# ip -f inet6 tunnel add gre6 mode ip6gre remote fd00::1
# ethtool -k gre6 (truncated output)
tx-checksumming: off
scatter-gather: off
tcp-segmentation-offload: off
generic-segmentation-offload: off [requested on]
But when netlink is used, they will be enabled:
# ip link add gre6 type ip6gre remote fd00::1
# ethtool -k gre6 (truncated output)
tx-checksumming: on
scatter-gather: on
tcp-segmentation-offload: on
generic-segmentation-offload: on
This results in a loss of performance when gre6 is created via ioctl.
The issue was found with LTP/gre tests.
Fix it by moving the setup of device features to a separate function
and invoke it with ndo_init callback because both netlink and ioctl
will eventually call it via register_netdevice():
register_netdevice()
- ndo_init() callback -> ip6gre_tunnel_init() or ip6gre_tap_init()
- ip6gre_tunnel_init_common()
- ip6gre_tnl_init_features()
The moved code also contains two minor style fixes:
* removed needless tab from GRE6_FEATURES on NETIF_F_HIGHDMA line.
* fixed the issue reported by checkpatch: "Unnecessary parentheses around
'nt->encap.type == TUNNEL_ENCAP_NONE'"
Fixes: ac4eb009e477 ("ip6gre: Add support for basic offloads offloads excluding GSO")
Signed-off-by: Alexey Kodanev <alexey.kodanev(a)oracle.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv6/ip6_gre.c | 57 +++++++++++++++++++++++++++++------------------------
1 file changed, 32 insertions(+), 25 deletions(-)
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1020,6 +1020,36 @@ static void ip6gre_tunnel_setup(struct n
eth_random_addr(dev->perm_addr);
}
+#define GRE6_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+
+ dev->features |= GRE6_FEATURES;
+ dev->hw_features |= GRE6_FEATURES;
+
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+ nt->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
@@ -1054,6 +1084,8 @@ static int ip6gre_tunnel_init_common(str
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ ip6gre_tnl_init_features(dev);
+
return 0;
}
@@ -1302,11 +1334,6 @@ static const struct net_device_ops ip6gr
.ndo_get_iflink = ip6_tnl_get_iflink,
};
-#define GRE6_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM)
-
static void ip6gre_tap_setup(struct net_device *dev)
{
@@ -1386,26 +1413,6 @@ static int ip6gre_newlink(struct net *sr
nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
- dev->features |= GRE6_FEATURES;
- dev->hw_features |= GRE6_FEATURES;
-
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- (nt->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
-
err = register_netdevice(dev);
if (err)
goto out;
Patches currently in stable-queue which might be from alexey.kodanev(a)oracle.com are
queue-4.14/vxlan-restore-dev-mtu-setting-based-on-lower-device.patch
queue-4.14/ip6_gre-fix-device-features-for-ioctl-setup.patch
This is a note to let you know that I've just added the patch titled
bnxt_en: Fix sources of spurious netpoll warnings
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
bnxt_en-fix-sources-of-spurious-netpoll-warnings.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: Calvin Owens <calvinowens(a)fb.com>
Date: Fri, 8 Dec 2017 09:05:26 -0800
Subject: bnxt_en: Fix sources of spurious netpoll warnings
From: Calvin Owens <calvinowens(a)fb.com>
[ Upstream commit 2edbdb3159d6f6bd3a9b6e7f789f2b879699a519 ]
After applying 2270bc5da3497945 ("bnxt_en: Fix netpoll handling") and
903649e718f80da2 ("bnxt_en: Improve -ENOMEM logic in NAPI poll loop."),
we still see the following WARN fire:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1875170 at net/core/netpoll.c:165 netpoll_poll_dev+0x15a/0x160
bnxt_poll+0x0/0xd0 exceeded budget in poll
<snip>
Call Trace:
[<ffffffff814be5cd>] dump_stack+0x4d/0x70
[<ffffffff8107e013>] __warn+0xd3/0xf0
[<ffffffff8107e07f>] warn_slowpath_fmt+0x4f/0x60
[<ffffffff8179519a>] netpoll_poll_dev+0x15a/0x160
[<ffffffff81795f38>] netpoll_send_skb_on_dev+0x168/0x250
[<ffffffff817962fc>] netpoll_send_udp+0x2dc/0x440
[<ffffffff815fa9be>] write_ext_msg+0x20e/0x250
[<ffffffff810c8125>] call_console_drivers.constprop.23+0xa5/0x110
[<ffffffff810c9549>] console_unlock+0x339/0x5b0
[<ffffffff810c9a88>] vprintk_emit+0x2c8/0x450
[<ffffffff810c9d5f>] vprintk_default+0x1f/0x30
[<ffffffff81173df5>] printk+0x48/0x50
[<ffffffffa0197713>] edac_raw_mc_handle_error+0x563/0x5c0 [edac_core]
[<ffffffffa0197b9b>] edac_mc_handle_error+0x42b/0x6e0 [edac_core]
[<ffffffffa01c3a60>] sbridge_mce_output_error+0x410/0x10d0 [sb_edac]
[<ffffffffa01c47cc>] sbridge_check_error+0xac/0x130 [sb_edac]
[<ffffffffa0197f3c>] edac_mc_workq_function+0x3c/0x90 [edac_core]
[<ffffffff81095f8b>] process_one_work+0x19b/0x480
[<ffffffff810967ca>] worker_thread+0x6a/0x520
[<ffffffff8109c7c4>] kthread+0xe4/0x100
[<ffffffff81884c52>] ret_from_fork+0x22/0x40
This happens because we increment rx_pkts on -ENOMEM and -EIO, resulting
in rx_pkts > 0. Fix this by only bumping rx_pkts if we were actually
given a non-zero budget.
Signed-off-by: Calvin Owens <calvinowens(a)fb.com>
Acked-by: Michael Chan <michael.chan(a)broadcom.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1875,7 +1875,7 @@ static int bnxt_poll_work(struct bnxt *b
* here forever if we consistently cannot allocate
* buffers.
*/
- else if (rc == -ENOMEM)
+ else if (rc == -ENOMEM && budget)
rx_pkts++;
else if (rc == -EBUSY) /* partial completion */
break;
@@ -1961,7 +1961,7 @@ static int bnxt_poll_nitroa0(struct napi
cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
- if (likely(rc == -EIO))
+ if (likely(rc == -EIO) && budget)
rx_pkts++;
else if (rc == -EBUSY) /* partial completion */
break;
Patches currently in stable-queue which might be from calvinowens(a)fb.com are
queue-4.14/bnxt_en-fix-sources-of-spurious-netpoll-warnings.patch
This is a note to let you know that I've just added the patch titled
adding missing rcu_read_unlock in ipxip6_rcv
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
adding-missing-rcu_read_unlock-in-ipxip6_rcv.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:12:48 CET 2017
From: "Nikita V. Shirokov" <tehnerd(a)fb.com>
Date: Wed, 6 Dec 2017 17:15:43 -0800
Subject: adding missing rcu_read_unlock in ipxip6_rcv
From: "Nikita V. Shirokov" <tehnerd(a)fb.com>
[ Upstream commit 74c4b656c3d92ec4c824ea1a4afd726b7b6568c8 ]
commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
introduced new exit point in ipxip6_rcv. however rcu_read_unlock is
missing there. this diff is fixing this
v1->v2:
instead of doing rcu_read_unlock in place, we are going to "drop"
section (to prevent skb leakage)
Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
Signed-off-by: Nikita V. Shirokov <tehnerd(a)fb.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv6/ip6_tunnel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -912,7 +912,7 @@ static int ipxip6_rcv(struct sk_buff *sk
if (t->parms.collect_md) {
tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
- return 0;
+ goto drop;
}
ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
Patches currently in stable-queue which might be from tehnerd(a)fb.com are
queue-4.14/adding-missing-rcu_read_unlock-in-ipxip6_rcv.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Enable CR4.PCIDE on supported systems
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-enable-cr4.pcide-on-supported-systems.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 660da7c9228f685b2ebe664f9fd69aaddcc420b5 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 29 Jun 2017 08:53:21 -0700
Subject: x86/mm: Enable CR4.PCIDE on supported systems
From: Andy Lutomirski <luto(a)kernel.org>
commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream.
We can use PCID if the CPU has PCID and PGE and we're not on Xen.
By itself, this has no effect. A followup patch will start using PCID.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Reviewed-by: Nadav Amit <nadav.amit(a)gmail.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Arjan van de Ven <arjan(a)linux.intel.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: linux-mm(a)kvack.org
Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.149875120…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/tlbflush.h | 8 ++++++++
arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
arch/x86/xen/enlighten.c | 6 ++++++
3 files changed, 36 insertions(+)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -191,6 +191,14 @@ static inline void __flush_tlb_all(void)
__flush_tlb_global();
else
__flush_tlb();
+
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+ * we might fail to flush kernel translations for other cached ASIDs.
+ *
+ * To avoid this issue, we force PCID off if PGE is off.
+ */
}
static inline void __flush_tlb_one(unsigned long addr)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -324,6 +324,25 @@ static __always_inline void setup_smap(s
}
}
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_
setup_smep(c);
setup_smap(c);
+ /* Set up PCID */
+ setup_pcid(c);
+
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -444,6 +444,12 @@ static void __init xen_init_cpuid_mask(v
~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
+ /*
+ * Xen PV would need some work to support PCID: CR3 handling as well
+ * as xen_flush_tlb_others() would need updating.
+ */
+ cpuid_leaf1_ecx_mask &= ~(1 << X86_FEATURE_PCID); /* disable PCID */
+
if (!xen_initial_domain())
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.9/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
queue-4.9/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
queue-4.9/x86-mm-enable-cr4.pcide-on-supported-systems.patch
queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
queue-4.9/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
queue-4.9/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
queue-4.9/x86-mm-disable-pcid-on-32-bit-kernels.patch
queue-4.9/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
This is a note to let you know that I've just added the patch titled
x86/mm/64: Fix reboot interaction with CR4.PCIDE
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 924c6b900cfdf376b07bccfd80e62b21914f8a5a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Sun, 8 Oct 2017 21:53:05 -0700
Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE
From: Andy Lutomirski <luto(a)kernel.org>
commit 924c6b900cfdf376b07bccfd80e62b21914f8a5a upstream.
Trying to reboot via real mode fails with PCID on: long mode cannot
be exited while CR4.PCIDE is set. (No, I have no idea why, but the
SDM and actual CPUs are in agreement here.) The result is a GPF and
a hang instead of a reboot.
I didn't catch this in testing because neither my computer nor my VM
reboots this way. I can trigger it with reboot=bios, though.
Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
Reported-and-tested-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)alien8.de>
Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.15075247…
Cc: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/reboot.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -106,6 +106,10 @@ void __noreturn machine_real_restart(uns
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.9/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
queue-4.9/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
queue-4.9/x86-mm-enable-cr4.pcide-on-supported-systems.patch
queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
queue-4.9/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
queue-4.9/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
queue-4.9/x86-mm-disable-pcid-on-32-bit-kernels.patch
queue-4.9/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Add the 'nopcid' boot option to turn off PCID
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 0790c9aad84901ca1bdc14746175549c8b5da215 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 29 Jun 2017 08:53:20 -0700
Subject: x86/mm: Add the 'nopcid' boot option to turn off PCID
From: Andy Lutomirski <luto(a)kernel.org>
commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream.
The parameter is only present on x86_64 systems to save a few bytes,
as PCID is always disabled on x86_32.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Reviewed-by: Nadav Amit <nadav.amit(a)gmail.com>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Arjan van de Ven <arjan(a)linux.intel.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: linux-mm(a)kvack.org
Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.149875120…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
Documentation/kernel-parameters.txt | 2 ++
arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++
2 files changed, 20 insertions(+)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2795,6 +2795,8 @@ bytes respectively. Such letter suffixes
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
+ nopcid [X86-64] Disable the PCID cpu feature.
+
norandmaps Don't use address space randomization. Equivalent to
echo 0 > /proc/sys/kernel/randomize_va_space
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+#ifdef CONFIG_X86_64
+static int __init x86_pcid_setup(char *s)
+{
+ /* require an exact match without trailing characters */
+ if (strlen(s))
+ return 0;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return 1;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ pr_info("nopcid: PCID feature disabled\n");
+ return 1;
+}
+__setup("nopcid", x86_pcid_setup);
+#endif
+
static int __init x86_noinvpcid_setup(char *s)
{
/* noinvpcid doesn't accept parameters */
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.9/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
queue-4.9/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
queue-4.9/x86-mm-enable-cr4.pcide-on-supported-systems.patch
queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
queue-4.9/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
queue-4.9/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
queue-4.9/x86-mm-disable-pcid-on-32-bit-kernels.patch
queue-4.9/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Enable CR4.PCIDE on supported systems
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-enable-cr4.pcide-on-supported-systems.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 660da7c9228f685b2ebe664f9fd69aaddcc420b5 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 29 Jun 2017 08:53:21 -0700
Subject: x86/mm: Enable CR4.PCIDE on supported systems
From: Andy Lutomirski <luto(a)kernel.org>
commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream.
We can use PCID if the CPU has PCID and PGE and we're not on Xen.
By itself, this has no effect. A followup patch will start using PCID.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Reviewed-by: Nadav Amit <nadav.amit(a)gmail.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Arjan van de Ven <arjan(a)linux.intel.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: linux-mm(a)kvack.org
Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.149875120…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/tlbflush.h | 8 ++++++++
arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
arch/x86/xen/enlighten.c | 6 ++++++
3 files changed, 36 insertions(+)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -190,6 +190,14 @@ static inline void __flush_tlb_all(void)
__flush_tlb_global();
else
__flush_tlb();
+
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+ * we might fail to flush kernel translations for other cached ASIDs.
+ *
+ * To avoid this issue, we force PCID off if PGE is off.
+ */
}
static inline void __flush_tlb_one(unsigned long addr)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -321,6 +321,25 @@ static __always_inline void setup_smap(s
}
}
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
+}
+
/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
@@ -952,6 +971,9 @@ static void identify_cpu(struct cpuinfo_
setup_smep(c);
setup_smap(c);
+ /* Set up PCID */
+ setup_pcid(c);
+
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -433,6 +433,12 @@ static void __init xen_init_cpuid_mask(v
~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
+ /*
+ * Xen PV would need some work to support PCID: CR3 handling as well
+ * as xen_flush_tlb_others() would need updating.
+ */
+ cpuid_leaf1_ecx_mask &= ~(1 << X86_FEATURE_PCID); /* disable PCID */
+
if (!xen_initial_domain())
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.4/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
queue-4.4/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
queue-4.4/x86-mm-enable-cr4.pcide-on-supported-systems.patch
queue-4.4/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
queue-4.4/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
queue-4.4/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
queue-4.4/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
queue-4.4/x86-mm-disable-pcid-on-32-bit-kernels.patch
queue-4.4/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch