From: Madalin Bucur madalin.bucur@nxp.com
[ Upstream commit 89857a8a5c89a406b967ab2be7bd2ccdbe75e73d ]
By clearing all interrupt sources, not only those that already occurred, the existing code may acknowledge by mistake interrupts that occurred after the code checks for them.
Signed-off-by: Madalin Bucur madalin.bucur@nxp.com Signed-off-by: Roy Pledge roy.pledge@nxp.com Signed-off-by: Li Yang leoyang.li@nxp.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/soc/fsl/qbman/qman.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 2cc82ed6433a0..91f5c951850f7 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1073,18 +1073,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0;
if (unlikely(!is)) return IRQ_NONE;
/* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; }
From: Peng Hao peng.hao2@zte.com.cn
[ Upstream commit ba16adeb346387eb2d1ada69003588be96f098fa ]
devm_ allocated data will be automatically freed. The free of devm_ allocated data is invalid.
Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: Peng Hao peng.hao2@zte.com.cn [title's prefix changed] Signed-off-by: Robert Jarzmik robert.jarzmik@free.fr Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/plat-pxa/ssp.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ba13f793fbce4..b92673efffffb 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -237,8 +237,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV;
- iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res));
@@ -248,7 +246,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock);
- kfree(ssp); return 0; }
From: Srinivas Kandagatla srinivas.kandagatla@linaro.org
[ Upstream commit 2a81efb0de0e33f2d2c83154af0bd3ce389b3269 ]
Add compatible to gicv3 node to enable quirk required to restrict writing to GICR_WAKER register which is restricted on msm8996 SoC in Hypervisor.
With this quirk MSM8996 can at least boot out of mainline, which can help community to work with boards based on MSM8996.
Without this patch Qualcomm DB820c board reboots on mainline.
Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Signed-off-by: Andy Gross andy.gross@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 2c93de7fffe50..bdea2d6fde94a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -219,7 +219,7 @@ compatible = "simple-bus";
intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>;
From: Anders Roxell anders.roxell@linaro.org
[ Upstream commit f2105d42597f4d10e431b195d69e96dccaf9b012 ]
Fix link errors when CONFIG_FSL_USB2_OTG is enabled and USB_OTG_FSM is set to module then the following link error occurs.
aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_ioctl': drivers/usb/phy/phy-fsl-usb.c:1083: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:1083:(.text+0x574): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_srp': drivers/usb/phy/phy-fsl-usb.c:674: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:674:(.text+0x61c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_host': drivers/usb/phy/phy-fsl-usb.c:593: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:593:(.text+0x7a4): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_hnp': drivers/usb/phy/phy-fsl-usb.c:695: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:695:(.text+0x858): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `a_wait_enum': drivers/usb/phy/phy-fsl-usb.c:274: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:274:(.text+0x16f0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o:drivers/usb/phy/phy-fsl-usb.c:619: more undefined references to `otg_statemachine' follow aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_peripheral': drivers/usb/phy/phy-fsl-usb.c:619:(.text+0x1fa0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' make[1]: *** [Makefile:1020: vmlinux] Error 1 make[1]: Target 'Image' not remade because of errors. make: *** [Makefile:152: sub-make] Error 2 make: Target 'Image' not remade because of errors.
Rework so that FSL_USB2_OTG depends on that the USB_OTG_FSM is builtin.
Signed-off-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 125cea1c3c8df..19ce615455c1b 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -20,7 +20,7 @@ config AB8500_USB
config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help
From: Lubomir Rintel lkundrak@v3.sk
[ Upstream commit 2380a22b60ce6f995eac806e69c66e397b59d045 ]
Resetting bit 4 disables the interrupt delivery to the "secure processor" core. This breaks the keyboard on a OLPC XO 1.75 laptop, where the firmware running on the "secure processor" bit-bangs the PS/2 protocol over the GPIO lines.
It is not clear what the rest of the bits are and Marvell was unhelpful when asked for documentation. Aside from the SP bit, there are probably priority bits.
Leaving the unknown bits as the firmware set them up seems to be a wiser course of action compared to just turning them off.
Signed-off-by: Lubomir Rintel lkundrak@v3.sk Acked-by: Pavel Machek pavel@ucw.cz [maz: fixed-up subject and commit message] Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-mmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 013fc9659a842..2fe2bcb63a711 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f
+#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static struct mmp_intc_conf mmp_conf = { static struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, };
static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs)
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit b14e945bda8ae227d1bf2b1837c0c4a61721cd1a ]
When initializing clocks, a reference to the TCON channel 0 clock is obtained. However, the clock is never prepared and enabled later. Switching from simplefb to DRM actually disables the clock (that was usually configured by U-Boot) because of that.
On the V3s, this results in a hang when writing to some mixer registers when switching over to DRM from simplefb.
Fix this by preparing and enabling the clock when initializing other clocks. Waiting for sun4i_tcon_channel_enable to enable the clock is apparently too late and results in the same mixer register access hang.
Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Link: https://patchwork.freedesktop.org/patch/msgid/20190131132550.26355-1-paul.ko... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index f2975a1525bee..2796fea70a427 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -327,6 +327,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, dev_err(dev, "Couldn't get the TCON channel 0 clock\n"); return PTR_ERR(tcon->sclk0); } + clk_prepare_enable(tcon->sclk0);
if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -341,6 +342,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); }
From: Codrin Ciubotariu codrin.ciubotariu@microchip.com
[ Upstream commit dc3f595b6617ebc0307e0ce151e8f2f2b2489b95 ]
atchan->status variable is used to store two different information: - pass channel interrupts status from interrupt handler to tasklet; - channel information like whether it is cyclic or paused;
This causes a bug when device_terminate_all() is called, (AT_XDMAC_CHAN_IS_CYCLIC cleared on atchan->status) and then a late End of Block interrupt arrives (AT_XDMAC_CIS_BIS), which sets bit 0 of atchan->status. Bit 0 is also used for AT_XDMAC_CHAN_IS_CYCLIC, so when a new descriptor for a cyclic transfer is created, the driver reports the channel as in use:
if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { dev_err(chan2dev(chan), "channel currently used\n"); return NULL; }
This patch fixes the bug by adding a different struct member to keep the interrupts status separated from the channel status bits.
Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Codrin Ciubotariu codrin.ciubotariu@microchip.com Acked-by: Ludovic Desroches ludovic.desroches@microchip.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/dma/at_xdmac.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index ee7b48d5243cf..b222dd7afe8e2 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1582,8 +1583,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask;
- dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status);
error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1591,15 +1592,15 @@ static void at_xdmac_tasklet(unsigned long data)
if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd;
- if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
spin_lock_bh(&atchan->lock); @@ -1654,7 +1655,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1668,7 +1669,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
- if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
tasklet_schedule(&atchan->tasklet);
From: Stefano Garzarella sgarzare@redhat.com
[ Upstream commit 22b5c0b63f32568e130fa2df4ba23efce3eb495b ]
virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity.
This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu yafu@redhat.com Signed-off-by: Stefano Garzarella sgarzare@redhat.com Acked-by: Stefan Hajnoczi stefanha@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 936d7eee62d03..6cbc08d82e7fd 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -71,6 +71,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get();
+ if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; }
@@ -495,10 +498,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
virtio_vsock_update_guest_cid(vsock);
- ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -526,8 +525,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0;
-out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -567,7 +564,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex);
vdev->config->del_vqs(vdev); @@ -600,14 +596,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + }
static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); }
From: Stefano Garzarella sgarzare@redhat.com
[ Upstream commit 85965487abc540368393a15491e6e7fcd230039d ]
When the virtio transport device disappear, we should reset all connected sockets in order to inform the users.
Signed-off-by: Stefano Garzarella sgarzare@redhat.com Reviewed-by: Stefan Hajnoczi stefanha@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 6cbc08d82e7fd..f66a6010ae075 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -541,6 +541,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work);
+ /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
From: Andy Shevchenko andriy.shevchenko@linux.intel.com
[ Upstream commit 6454368a804c4955ccd116236037536f81e5b1f1 ]
In case of mapping error the DMA addresses are invalid and continuing will screw system memory or potentially something else.
[ 222.480310] dmatest: dma0chan7-copy0: summary 1 tests, 3 failures 6 iops 349 KB/s (0) ... [ 240.912725] check: Corrupted low memory at 00000000c7c75ac9 (2940 phys) = 5656000000000000 [ 240.921998] check: Corrupted low memory at 000000005715a1cd (2948 phys) = 279f2aca5595ab2b [ 240.931280] check: Corrupted low memory at 000000002f4024c0 (2950 phys) = 5e5624f349e793cf ...
Abort any test if mapping failed.
Fixes: 4076e755dbec ("dmatest: convert to dmaengine_unmap_data") Cc: Dan Williams dan.j.williams@intel.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/dma/dmatest.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index ebe72a4665875..7dd46cf5ed845 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -583,11 +583,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -602,11 +600,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -643,12 +639,10 @@ static int dmatest_func(void *data) }
if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; }
done->done = false; @@ -657,12 +651,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan);
@@ -675,16 +667,14 @@ static int dmatest_func(void *data) dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { dmaengine_unmap_put(um); result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; }
dmaengine_unmap_put(um); @@ -727,6 +717,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime);
From: Naresh Kamboju naresh.kamboju@linaro.org
[ Upstream commit 952b72f89ae23b316da8c1021b18d0c388ad6cc4 ]
In selftests the config fragment for netfilter was added as NF_TABLES_INET=y and this patch correct it as CONFIG_NF_TABLES_INET=y
Signed-off-by: Naresh Kamboju naresh.kamboju@linaro.org Acked-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a85..59caa8f71cd80 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y
From: Florian Westphal fw@strlen.de
[ Upstream commit 98bfc3414bda335dbd7fec58bde6266f991801d7 ]
Check basic nat/redirect/masquerade for ipv4 and ipv6.
Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_nat.sh | 762 +++++++++++++++++++ 2 files changed, 763 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_nat.sh
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb8..c9ff2b47bd1ca 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh
include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 0000000000000..8ec76681605cc --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add ip6 dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat to 10.0.2.99 + } +} +EOF + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do + ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret
From: Martynas Pumputis martynas@weave.works
[ Upstream commit 4e35c1cb9460240e983a01745b5f29fe3a4d8e39 ]
It is possible that two concurrent packets originating from the same socket of a connection-less protocol (e.g. UDP) can end up having different IP_CT_DIR_REPLY tuples which results in one of the packets being dropped.
To illustrate this, consider the following simplified scenario:
1. Packet A and B are sent at the same time from two different threads by same UDP socket. No matching conntrack entry exists yet. Both packets cause allocation of a new conntrack entry. 2. get_unique_tuple gets called for A. No clashing entry found. conntrack entry for A is added to main conntrack table. 3. get_unique_tuple is called for B and will find that the reply tuple of B is already taken by A. It will allocate a new UDP source port for B to resolve the clash. 4. conntrack entry for B cannot be added to main conntrack table because its ORIGINAL direction is clashing with A and the REPLY directions of A and B are not the same anymore due to UDP source port reallocation done in step 3.
This patch modifies nf_conntrack_tuple_taken so it doesn't consider colliding reply tuples if the IP_CT_DIR_ORIGINAL tuples are equal.
[ Florian: simplify patch to not use .allow_clash setting and always ignore identical flows ]
Signed-off-by: Martynas Pumputis martynas@weave.works Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_conntrack_core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 19b3f4fbea520..df1d5618b008f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -855,6 +855,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, }
if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1;
From: Julian Wiedmann jwi@linux.ibm.com
[ Upstream commit afa0c5904ba16d59b0454f7ee4c807dae350f432 ]
The error path in qeth_alloc_qdio_buffers() that takes care of cleaning up the Output Queues is buggy. It first frees the queue, but then calls qeth_clear_outq_buffers() with that very queue struct.
Make the call to qeth_clear_outq_buffers() part of the free action (in the correct order), and while at it fix the naming of the helper.
Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Reviewed-by: Alexandra Winter wintera@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/qeth_core_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 8f77fc0630ce0..86a02592b982c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2449,11 +2449,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) return rc; }
-static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return;
+ qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2526,10 +2527,8 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2562,10 +2561,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; }
From: Jiri Olsa jolsa@redhat.com
[ Upstream commit 59a17706915fe5ea6f711e1f92d4fb706bce07fe ]
When perf is built with the annobin plugin (RHEL8 build) extra symbols are added to its binary:
# nm perf | grep annobin | head -10 0000000000241100 t .annobin_annotate.c 0000000000326490 t .annobin_annotate.c 0000000000249255 t .annobin_annotate.c_end 00000000003283a8 t .annobin_annotate.c_end 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bc3e2 t .annobin_annotate.c_end.unlikely 00000000001bc400 t .annobin_annotate.c_end.unlikely 00000000001bce18 t .annobin_annotate.c.hot 00000000001bce18 t .annobin_annotate.c.hot ...
Those symbols have no use for report or annotation and should be skipped. Moreover they interfere with the DWARF unwind test on the PPC arch, where they are mixed with checked symbols and then the test fails:
# perf test dwarf -v 59: Test dwarf unwind : --- start --- test child forked, pid 8515 unwind: .annobin_dwarf_unwind.c:ip = 0x10dba40dc (0x2740dc) ... got: .annobin_dwarf_unwind.c 0x10dba40dc, expecting test__arch_unwind_sample unwind: failed with 'no error'
The annobin symbols are defined as NOTYPE/LOCAL/HIDDEN:
# readelf -s ./perf | grep annobin | head -1 40: 00000000001bce4f 0 NOTYPE LOCAL HIDDEN 13 .annobin_init.c
They can still pass the check for the label symbol. Adding check for HIDDEN and INTERNAL (as suggested by Nick below) visibility and filter out such symbols.
Just to be awkward, if you are going to ignore STV_HIDDEN symbols then you should probably also ignore STV_INTERNAL ones as well... Annobin does not generate them, but you never know, one day some other tool might create some.
Signed-off-by: Jiri Olsa jolsa@kernel.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Masami Hiramatsu mhiramat@kernel.org Cc: Michael Petlan mpetlan@redhat.com Cc: Namhyung Kim namhyung@kernel.org Cc: Nick Clifton nickc@redhat.com Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/20190128133526.GD15461@krava Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/symbol-elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index adbc6c02c3aaa..20ba5a9aeae40 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -85,6 +85,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); }
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -109,7 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; }
static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit 6ab3bc240ade47a0f52bc16d97edd9accbe0024e ]
With a suitably defined "probe:vfs_getname" probe, 'perf trace' can "beautify" its output, so syscalls like open() or openat() can print the "filename" argument instead of just its hex address, like:
$ perf trace -e open -- touch /dev/null [...] 0.590 ( 0.014 ms): touch/18063 open(filename: /dev/null, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 [...]
The output without such beautifier looks like:
0.529 ( 0.011 ms): touch/18075 open(filename: 0xc78cf288, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
However, when the vfs_getname probe expands to multiple probes and it is not the first one that is hit, the beautifier fails, as following:
0.326 ( 0.010 ms): touch/18072 open(filename: , flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
Fix it by hooking into all the expanded probes (inlines), now, for instance:
[root@quaco ~]# perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) [root@quaco ~]# perf trace -e open* sleep 1 0.010 ( 0.005 ms): sleep/5588 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC) = 3 0.029 ( 0.006 ms): sleep/5588 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC) = 3 0.194 ( 0.008 ms): sleep/5588 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3 [root@quaco ~]#
Works, further verified with:
[root@quaco ~]# perf test vfs 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok [root@quaco ~]#
Reported-by: Michael Petlan mpetlan@redhat.com Tested-by: Michael Petlan mpetlan@redhat.com Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Namhyung Kim namhyung@kernel.org Link: https://lkml.kernel.org/n/tip-mv8kolk17xla1smvmp3qabv1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/builtin-trace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0fd8bfb77f652..6e568d361edf7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2059,19 +2059,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err);
- if (IS_ERR(evsel)) + if (ret) return false;
- if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; }
- evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; }
static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
From: Huacai Chen chenhc@lemote.com
[ Upstream commit e02e07e3127d8aec1f4bcdfb2fc52a2d99b4859e ]
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions "before each ll/lld" and "at the branch-target between ll/sc" to workaround. Otherwise, this flaw will cause deadlock occasionally (e.g. when doing heavy load test with LTP).
Below is the explaination of CPU designer:
"For Loongson 3 family, when a memory access instruction (load, store, or prefetch)'s executing occurs between the execution of LL and SC, the success or failure of SC is not predictable. Although programmer would not insert memory access instructions between LL and SC, the memory instructions before LL in program-order, may dynamically executed between the execution of LL/SC, so a memory fence (SYNC) is needed before LL/LLD to avoid this situation.
Since Loongson-3A R2 (3A2000), we have improved our hardware design to handle this case. But we later deduce a rarely circumstance that some speculatively executed memory instructions due to branch misprediction between LL/SC still fall into the above case, so a memory fence (SYNC) at branch-target (if its target is not between LL/SC) is needed for Loongson 3A1000, 3B1500, 3A2000 and 3A3000.
Our processor is continually evolving and we aim to to remove all these workaround-SYNCs around LL/SC for new-come processor."
Here is an example:
Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var, this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same time('sc' return 1), and the variable is only *added by 1*, sometimes, which is wrong and unacceptable(it should be added by 2).
Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's __ex_table section.
This patch fix all the cases in kernel, but:
+. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix the ll and branch-target coincidently such as atomic_sub_if_positive/ cmpxchg/xchg, just like this one.
+. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch edac.h
+. local_ops and cmpxchg_local should not be affected by this bug since only the owner can write.
+. mips_atomic_set for syscall.c is deprecated and rarely used, just let it go
Signed-off-by: Huacai Chen chenhc@lemote.com Signed-off-by: Huang Pei huangpei@loongson.cn [paul.burton@mips.com: - Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add a comment describing why it's there. - Make loongson_llsc_mb() a no-op when CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory barrier. - Add a comment describing the bug & how loongson_llsc_mb() helps in asm/barrier.h.] Signed-off-by: Paul Burton paul.burton@mips.com Cc: Ralf Baechle ralf@linux-mips.org Cc: ambrosehua@gmail.com Cc: Steven J . Hill Steven.Hill@cavium.com Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang zhangfx@lemote.com Cc: Zhangjin Wu wuzhangjin@gmail.com Cc: Li Xuefeng lixuefeng@loongson.cn Cc: Xu Chenghua xuchenghua@loongson.cn Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/Kconfig | 15 ++++++++++++++ arch/mips/include/asm/atomic.h | 6 ++++++ arch/mips/include/asm/barrier.h | 36 +++++++++++++++++++++++++++++++++ arch/mips/include/asm/bitops.h | 5 +++++ arch/mips/include/asm/futex.h | 3 +++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 23 +++++++++++++++++++++ arch/mips/mm/tlbex.c | 10 +++++++++ 8 files changed, 100 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index bb9940c6927e7..47662626a375f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1396,6 +1396,21 @@ config LOONGSON3_ENHANCEMENT please say 'N' here. If you want a high-performance kernel to run on new Loongson 3 machines only, please say 'Y' here.
+config CPU_LOONGSON3_WORKAROUNDS + bool "Old Loongson 3 LLSC Workarounds" + default y if SMP + depends on CPU_LOONGSON3 + help + Loongson 3 processors have the llsc issues which require workarounds. + Without workarounds the system may hang unexpectedly. + + Newer Loongson 3 will fix these issues and no workarounds are needed. + The workarounds have no significant side effect on them but may + decrease the performance of the system so this option should be + disabled unless the kernel is intended to be run on old systems. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 0ab176bdb8e81..8ee17565bc781 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -47,6 +47,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: ll %0, %1 # atomic_" #op " \n" \ @@ -86,6 +87,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: ll %1, %2 # atomic_" #op "_return \n" \ @@ -134,6 +136,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: ll %1, %2 # atomic_fetch_" #op " \n" \ @@ -389,6 +392,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: lld %0, %1 # atomic64_" #op " \n" \ @@ -428,6 +432,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: lld %1, %2 # atomic64_" #op "_return\n" \ @@ -477,6 +482,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set arch=r4000 \n" \ "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb199a7f..b7f6ac5e513c9 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,42 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb()
+/* + * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, + * store or pref) in between an ll & sc can cause the sc instruction to + * erroneously succeed, breaking atomicity. Whilst it's unusual to write code + * containing such sequences, this bug bites harder than we might otherwise + * expect due to reordering & speculation: + * + * 1) A memory access appearing prior to the ll in program order may actually + * be executed after the ll - this is the reordering case. + * + * In order to avoid this we need to place a memory barrier (ie. a sync + * instruction) prior to every ll instruction, in between it & any earlier + * memory access instructions. Many of these cases are already covered by + * smp_mb__before_llsc() but for the remaining cases, typically ones in + * which multiple CPUs may operate on a memory location but ordering is not + * usually guaranteed, we use loongson_llsc_mb() below. + * + * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. + * + * 2) If a conditional branch exists between an ll & sc with a target outside + * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() + * or similar, then misprediction of the branch may allow speculative + * execution of memory accesses from outside of the ll-sc loop. + * + * In order to avoid this we need a memory barrier (ie. a sync instruction) + * at each affected branch target, for which we also use loongson_llsc_mb() + * defined below. + * + * This case affects all current Loongson 3 CPUs. + */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() do { } while (0) +#endif + #include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index fa57cef12a466..38a162d11b7b8 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -68,6 +68,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -78,6 +79,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -120,6 +122,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -130,6 +133,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -188,6 +192,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp;
+ loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index a9e61ea54ca96..0a62a91b592d6 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -162,6 +163,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -190,6 +192,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9e9e94415d08f..aab7b382a0623 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -229,6 +229,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set "MIPS_ISA_ARCH_LEVEL" \n" " .set push \n" @@ -244,6 +245,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set mips0 \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce4608aa886..c1a4d4dc46655 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap + +# +# Some versions of binutils, not currently mainline as of 2019/02/04, support +# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction +# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a +# description). +# +# We disable this in order to prevent the assembler meddling with the +# instruction that labels refer to, ie. if we label an ll instruction: +# +# 1: ll v0, 0(a0) +# +# ...then with the assembler fix applied the label may actually point at a sync +# instruction inserted by the assembler, and if we were using the label in an +# exception table the table would no longer contain the address of the ll +# instruction. +# +# Avoid this by explicitly disabling that assembler behaviour. If upstream +# binutils does not merge support for the flag then we can revisit & remove +# this later - for now it ensures vendor toolchains don't cause problems. +# +cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 2da5649fc545f..0026c77351347 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -931,6 +931,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr);
@@ -1637,6 +1639,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2218,6 +2222,8 @@ static void build_r4000_tlb_load_handler(void) #endif
uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2273,6 +2279,8 @@ static void build_r4000_tlb_store_handler(void) #endif
uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2329,6 +2337,8 @@ static void build_r4000_tlb_modify_handler(void) #endif
uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) {
From: Jun-Ru Chang jrjang@realtek.com
[ Upstream commit 2b424cfc69728224fcb5fad138ea7260728e0901 ]
Patch (b6c7a324df37b "MIPS: Fix get_frame_info() handling of microMIPS function size.") introduces additional function size check for microMIPS by only checking insn between ip and ip + func_size. However, func_size in get_frame_info() is always 0 if KALLSYMS is not enabled. This causes get_frame_info() to return immediately without calculating correct frame_size, which in turn causes "Can't analyze schedule() prologue" warning messages at boot time.
This patch removes func_size check, and let the frame_size check run up to 128 insns for both MIPS and microMIPS.
Signed-off-by: Jun-Ru Chang jrjang@realtek.com Signed-off-by: Tony Wu tonywu@realtek.com Signed-off-by: Paul Burton paul.burton@mips.com Fixes: b6c7a324df37b ("MIPS: Fix get_frame_info() handling of microMIPS function size.") Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: macro@mips.com Cc: yamada.masahiro@socionext.com Cc: peterz@infradead.org Cc: mingo@kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 1cc133e7026fd..fffd031dc6b61 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -344,7 +344,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -356,10 +356,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err;
- ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0];
From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
[ Upstream commit 43636c804df0126da669c261fc820fb22f62bfc2 ]
When something let __find_get_block_slow() hit all_mapped path, it calls printk() for 100+ times per a second. But there is no need to print same message with such high frequency; it is just asking for stall warning, or at least bloating log files.
[ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.873324][T15342] b_state=0x00000029, b_size=512 [ 399.878403][T15342] device loop0 blocksize: 4096 [ 399.883296][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.890400][T15342] b_state=0x00000029, b_size=512 [ 399.895595][T15342] device loop0 blocksize: 4096 [ 399.900556][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.907471][T15342] b_state=0x00000029, b_size=512 [ 399.912506][T15342] device loop0 blocksize: 4096
This patch reduces frequency to up to once per a second, in addition to concatenating three lines into one.
[ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8, b_state=0x00000029, b_size=512, device loop0 blocksize: 4096
Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Reviewed-by: Jan Kara jack@suse.cz Cc: Dmitry Vyukov dvyukov@google.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index 5d8f496d624ed..e0d46d47e3587 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -207,6 +207,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -234,15 +235,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock);
linux-stable-mirror@lists.linaro.org