Hi Greg, Ben, and all
Is https://www.kernel.org/category/releases.html updated in terms of EOL?
Some news out of Linaro conference [2] generated a lot of doubts and questions
around.
Specially because on the way it was stated by the news 3.16 wouldn't be active
anymore. So I'm not sure about the news, but I'd like confirmation from you about
expected EOL.
[2] https://itsfoss.com/linux-lts-kernel-six-years/
Thanks in advance,
Rodrigo.
Inside of start_xmit() the call to check if the connection is up and the
queueing of the packets for later transmission is not atomic which
leaves a window where cm_rep_handler can run, set the connection up,
dequeue pending packets and leave the subsequently queued packets by
start_xmit() sitting on neigh->queue until they're dropped when the
connection is torn down. This only applies to connected mode. These
dropped packets can really upset TCP, for example, and cause
multi-minute delays in transmission for open connections.
Here's the code in start_xmit where we check to see if the connection
is up:
if (ipoib_cm_get(neigh)) {
if (ipoib_cm_up(neigh)) {
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
}
The race occurs if cm_rep_handler execution occurs after the above
connection check (specifically if it gets to the point where it acquires
priv->lock to dequeue pending skb's) but before the below code snippet
in start_xmit where packets are queued.
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
The patch re-checks ipoib_cm_up with priv->lock held to avoid this
race condition. Since odds are the conn should be up most of the time
(and thus the connection *not* down most of the time) we don't hold the
lock for the first check attempt to avoid a slowdown from unecessary
locking for the majority of the packets transmitted during the
connection's life.
Signed-off-by: Aaron Knister <aaron.s.knister(a)nasa.gov>
---
drivers/infiniband/ulp/ipoib/ipoib_main.c | 46 ++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95b..a950c916 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1093,6 +1093,21 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_unlock_irqrestore(&priv->lock, flags);
}
+static bool defer_neigh_skb(struct sk_buff *skb,
+ struct net_device *dev,
+ struct ipoib_neigh *neigh,
+ struct ipoib_pseudo_header *phdr)
+{
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&neigh->queue, skb);
+ return true;
+ }
+
+ return false;
+}
+
+
static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -1101,6 +1116,7 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
unsigned long flags;
+ bool deferred_pkt = true;
phdr = (struct ipoib_pseudo_header *) skb->data;
skb_pull(skb, sizeof(*phdr));
@@ -1160,6 +1176,23 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
+ /*
+ * Re-check ipoib_cm_up with priv->lock held to avoid
+ * race condition between start_xmit and skb_dequeue in
+ * cm_rep_handler. Since odds are the conn should be up
+ * most of the time, we don't hold the lock for the
+ * first check above
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ if (ipoib_cm_up(neigh)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ } else {
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+ goto unref;
} else if (neigh->ah && neigh->ah->valid) {
neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
IPOIB_QPN(phdr->hwaddr));
@@ -1168,17 +1201,16 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
neigh_refresh_path(neigh, phdr->hwaddr, dev);
}
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- push_pseudo_header(skb, phdr->hwaddr);
- spin_lock_irqsave(&priv->lock, flags);
- __skb_queue_tail(&neigh->queue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
- } else {
+ spin_lock_irqsave(&priv->lock, flags);
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+unref:
+ if (!deferred_pkt) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
-unref:
ipoib_neigh_put(neigh);
return NETDEV_TX_OK;
--
2.12.3
Use the new of_get_compatible_child() helper to lookup the slot child
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the node of the device being probed).
While at it, also fix up the related slot-node reference leak.
Fixes: ed80a13bb4c4 ("mmc: meson-mx-sdio: Add a driver for the Amlogic Meson8 and Meson8b SoCs")
Cc: stable <stable(a)vger.kernel.org> # 4.15
Cc: Carlo Caione <carlo(a)endlessm.com>
Cc: Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
Cc: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/mmc/host/meson-mx-sdio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index 09cb89645d06..2cfec33178c1 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -517,19 +517,23 @@ static struct mmc_host_ops meson_mx_mmc_ops = {
static struct platform_device *meson_mx_mmc_slot_pdev(struct device *parent)
{
struct device_node *slot_node;
+ struct platform_device *pdev;
/*
* TODO: the MMC core framework currently does not support
* controllers with multiple slots properly. So we only register
* the first slot for now
*/
- slot_node = of_find_compatible_node(parent->of_node, NULL, "mmc-slot");
+ slot_node = of_get_compatible_child(parent->of_node, "mmc-slot");
if (!slot_node) {
dev_warn(parent, "no 'mmc-slot' sub-node found\n");
return ERR_PTR(-ENOENT);
}
- return of_platform_device_create(slot_node, NULL, parent);
+ pdev = of_platform_device_create(slot_node, NULL, parent);
+ of_node_put(slot_node);
+
+ return pdev;
}
static int meson_mx_mmc_add_host(struct meson_mx_mmc_host *host)
--
2.18.0