Inside of start_xmit() the call to check if the connection is up and the
queueing of the packets for later transmission is not atomic which
leaves a window where cm_rep_handler can run, set the connection up,
dequeue pending packets and leave the subsequently queued packets by
start_xmit() sitting on neigh->queue until they're dropped when the
connection is torn down. This only applies to connected mode. These
dropped packets can really upset TCP, for example, and cause
multi-minute delays in transmission for open connections.
Here's the code in start_xmit where we check to see if the connection
is up:
if (ipoib_cm_get(neigh)) {
if (ipoib_cm_up(neigh)) {
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
}
The race occurs if cm_rep_handler execution occurs after the above
connection check (specifically if it gets to the point where it acquires
priv->lock to dequeue pending skb's) but before the below code snippet
in start_xmit where packets are queued.
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
The patch re-checks ipoib_cm_up with priv->lock held to avoid this
race condition. Since odds are the conn should be up most of the time
(and thus the connection *not* down most of the time) we don't hold the
lock for the first check attempt to avoid a slowdown from unecessary
locking for the majority of the packets transmitted during the
connection's life.
Signed-off-by: Aaron Knister <aaron.s.knister(a)nasa.gov>
---
drivers/infiniband/ulp/ipoib/ipoib_main.c | 46 ++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95b..a950c916 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1093,6 +1093,21 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_unlock_irqrestore(&priv->lock, flags);
}
+static bool defer_neigh_skb(struct sk_buff *skb,
+ struct net_device *dev,
+ struct ipoib_neigh *neigh,
+ struct ipoib_pseudo_header *phdr)
+{
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&neigh->queue, skb);
+ return true;
+ }
+
+ return false;
+}
+
+
static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -1101,6 +1116,7 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
unsigned long flags;
+ bool deferred_pkt = true;
phdr = (struct ipoib_pseudo_header *) skb->data;
skb_pull(skb, sizeof(*phdr));
@@ -1160,6 +1176,23 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
+ /*
+ * Re-check ipoib_cm_up with priv->lock held to avoid
+ * race condition between start_xmit and skb_dequeue in
+ * cm_rep_handler. Since odds are the conn should be up
+ * most of the time, we don't hold the lock for the
+ * first check above
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ if (ipoib_cm_up(neigh)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ } else {
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+ goto unref;
} else if (neigh->ah && neigh->ah->valid) {
neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
IPOIB_QPN(phdr->hwaddr));
@@ -1168,17 +1201,16 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
neigh_refresh_path(neigh, phdr->hwaddr, dev);
}
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- push_pseudo_header(skb, phdr->hwaddr);
- spin_lock_irqsave(&priv->lock, flags);
- __skb_queue_tail(&neigh->queue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
- } else {
+ spin_lock_irqsave(&priv->lock, flags);
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+unref:
+ if (!deferred_pkt) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
-unref:
ipoib_neigh_put(neigh);
return NETDEV_TX_OK;
--
2.12.3
Use the new of_get_compatible_child() helper to lookup the slot child
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the node of the device being probed).
While at it, also fix up the related slot-node reference leak.
Fixes: ed80a13bb4c4 ("mmc: meson-mx-sdio: Add a driver for the Amlogic Meson8 and Meson8b SoCs")
Cc: stable <stable(a)vger.kernel.org> # 4.15
Cc: Carlo Caione <carlo(a)endlessm.com>
Cc: Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
Cc: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/mmc/host/meson-mx-sdio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index 09cb89645d06..2cfec33178c1 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -517,19 +517,23 @@ static struct mmc_host_ops meson_mx_mmc_ops = {
static struct platform_device *meson_mx_mmc_slot_pdev(struct device *parent)
{
struct device_node *slot_node;
+ struct platform_device *pdev;
/*
* TODO: the MMC core framework currently does not support
* controllers with multiple slots properly. So we only register
* the first slot for now
*/
- slot_node = of_find_compatible_node(parent->of_node, NULL, "mmc-slot");
+ slot_node = of_get_compatible_child(parent->of_node, "mmc-slot");
if (!slot_node) {
dev_warn(parent, "no 'mmc-slot' sub-node found\n");
return ERR_PTR(-ENOENT);
}
- return of_platform_device_create(slot_node, NULL, parent);
+ pdev = of_platform_device_create(slot_node, NULL, parent);
+ of_node_put(slot_node);
+
+ return pdev;
}
static int meson_mx_mmc_add_host(struct meson_mx_mmc_host *host)
--
2.18.0
This is the start of the stable review cycle for the 4.18.5 release.
There are 22 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Aug 25 07:47:43 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.18.5-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.18.5-rc1
Jann Horn <jannh(a)google.com>
reiserfs: fix broken xattr handling (heap corruption, bad retval)
Esben Haabendal <eha(a)deif.com>
i2c: imx: Fix race condition in dma read
Hans de Goede <hdegoede(a)redhat.com>
i2c: core: ACPI: Properly set status byte to 0 for multi-byte writes
Lukas Wunner <lukas(a)wunner.de>
PCI: pciehp: Fix unprotected list iteration in IRQ handler
Lukas Wunner <lukas(a)wunner.de>
PCI: pciehp: Fix use-after-free on unplug
Myron Stowe <myron.stowe(a)redhat.com>
PCI: Skip MPS logic for Virtual Functions (VFs)
Zachary Zhang <zhangzg(a)marvell.com>
PCI: aardvark: Size bridges before resources allocation
Lukas Wunner <lukas(a)wunner.de>
PCI: hotplug: Don't leak pci_slot on registration failure
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
PCI / ACPI / PM: Resume all bridges on suspend-to-RAM
Christian König <ckoenig.leichtzumerken(a)gmail.com>
PCI: Restore resized BAR state on resume
John David Anglin <dave.anglin(a)bell.net>
parisc: Remove ordered stores from syscall.S
John David Anglin <dave.anglin(a)bell.net>
parisc: Remove unnecessary barriers from spinlock.h
Gustavo A. R. Silva <gustavo(a)embeddedor.com>
drm/amdgpu/pm: Fix potential Spectre v1
Gustavo A. R. Silva <gustavo(a)embeddedor.com>
drm/i915/kvmgt: Fix potential Spectre v1
Jeremy Cline <jcline(a)redhat.com>
ext4: fix spectre gadget in ext4_mb_regular_allocator()
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc64s: Show ori31 availability in spectre_v1 sysfs file not v2
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Remove freed kernel image areas from alias mapping
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Add helper for freeing kernel image pages
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Pass unconverted symbol addresses to free_init_pages()
Dave Hansen <dave.hansen(a)linux.intel.com>
mm: Allow non-direct-map arguments to free_reserved_area()
Matthijs van Duin <matthijsvanduin(a)gmail.com>
pty: fix O_CLOEXEC for TIOCGPTPEER
Takashi Iwai <tiwai(a)suse.de>
EDAC: Add missing MEM_LRDDR4 entry in edac_mem_types[]
-------------
Diffstat:
Makefile | 4 ++--
arch/parisc/include/asm/spinlock.h | 8 ++------
arch/parisc/kernel/syscall.S | 24 +++++++++++-----------
arch/powerpc/kernel/security.c | 27 ++++++++++++++++---------
arch/x86/include/asm/processor.h | 1 +
arch/x86/include/asm/set_memory.h | 1 +
arch/x86/mm/init.c | 37 +++++++++++++++++++++++++++++++---
arch/x86/mm/init_64.c | 8 ++------
arch/x86/mm/pageattr.c | 13 ++++++++++++
drivers/edac/edac_mc.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++-
drivers/gpu/drm/i915/gvt/kvmgt.c | 9 ++++++++-
drivers/i2c/busses/i2c-imx.c | 8 ++++----
drivers/i2c/i2c-core-acpi.c | 11 +++++++---
drivers/pci/controller/pci-aardvark.c | 1 +
drivers/pci/hotplug/pci_hotplug_core.c | 9 +++++++++
drivers/pci/hotplug/pciehp.h | 1 +
drivers/pci/hotplug/pciehp_core.c | 7 +++++++
drivers/pci/hotplug/pciehp_hpc.c | 18 +++++------------
drivers/pci/pci-acpi.c | 6 ++----
drivers/pci/pci.c | 28 +++++++++++++++++++++++++
drivers/pci/probe.c | 4 ++++
drivers/tty/pty.c | 2 +-
fs/ext4/mballoc.c | 4 +++-
fs/reiserfs/xattr.c | 4 +++-
mm/page_alloc.c | 16 +++++++++++++--
26 files changed, 185 insertions(+), 70 deletions(-)