From: Xi Wang wangxi11@huawei.com
[ Upstream commit 60c3becfd1a138fdcfe48f2a5ef41ef0078d481e ]
When run perftest in many times, the system will report a BUG as follows:
BUG: Bad rss-counter state mm:(____ptrval____) idx:0 val:-1 BUG: Bad rss-counter state mm:(____ptrval____) idx:1 val:1
We tested with different kernel version and found it started from the the following commit:
commit d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs")
In this commit, the sg->offset is always 0 when sg_set_page() is called in ib_umem_get() and the drivers are not allowed to change the sgl, otherwise it will get bad page descriptor when unfolding SGEs in __ib_umem_release() as sg_page_count() will get wrong result while sgl->offset is not 0.
However, there is a weird sgl usage in the current hns driver, the driver modified sg->offset after calling ib_umem_get(), which caused we iterate past the wrong number of pages in for_each_sg_page iterator.
This patch fixes it by correcting the non-standard sgl usage found in the hns_roce_db_map_user() function.
Fixes: d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Link: https://lore.kernel.org/r/1562808737-45723-1-git-send-email-oulijun@huawei.c... Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/hns/hns_roce_db.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 0c6c1fe87705c..d60453e98db7c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -12,13 +12,15 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db) { + unsigned long page_addr = virt & PAGE_MASK; struct hns_roce_user_db_page *page; + unsigned int offset; int ret = 0;
mutex_lock(&context->page_mutex);
list_for_each_entry(page, &context->page_list, list) - if (page->user_virt == (virt & PAGE_MASK)) + if (page->user_virt == page_addr) goto found;
page = kmalloc(sizeof(*page), GFP_KERNEL); @@ -28,8 +30,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, }
refcount_set(&page->refcount, 1); - page->user_virt = (virt & PAGE_MASK); - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->user_virt = page_addr; + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); @@ -39,10 +41,9 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, list_add(&page->list, &context->page_list);
found: - db->dma = sg_dma_address(page->umem->sg_head.sgl) + - (virt & ~PAGE_MASK); - page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; - db->virt_addr = sg_virt(page->umem->sg_head.sgl); + offset = virt - page_addr; + db->dma = sg_dma_address(page->umem->sg_head.sgl) + offset; + db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount);
From: Chuhong Yuan hslester96@gmail.com
[ Upstream commit b7f406bb883ba7ac3222298f6b44cebc4cfe2dde ]
Memory allocated by kvzalloc should not be freed by kfree(), use kvfree() instead.
Fixes: 813e90b1aeaa ("IB/mlx5: Add advise_mr() support") Link: https://lore.kernel.org/r/20190717082101.14196-1-hslester96@gmail.com Signed-off-by: Chuhong Yuan hslester96@gmail.com Reviewed-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/mlx5/odp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91507a2e92900..f6e5351ba4d50 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1765,7 +1765,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, w->num_sge, 0); - kfree(w); + kvfree(w); }
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1807,7 +1807,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (valid_req) queue_work(system_unbound_wq, &work->work); else - kfree(work); + kvfree(work);
srcu_read_unlock(&dev->mr_srcu, srcu_key);
From: Codrin Ciubotariu codrin.ciubotariu@microchip.com
[ Upstream commit 1573eebeaa8055777eb753f9b4d1cbe653380c38 ]
In clk_generated_determine_rate(), if the divisor is greater than GENERATED_MAX_DIV + 1, then the wrong best_rate will be returned. If clk_generated_set_rate() will be called later with this wrong rate, it will return -EINVAL, so the generated clock won't change its value. Do no let the divisor be greater than GENERATED_MAX_DIV + 1.
Fixes: 8c7aa6328947 ("clk: at91: clk-generated: remove useless divisor loop") Signed-off-by: Codrin Ciubotariu codrin.ciubotariu@microchip.com Acked-by: Nicolas Ferre nicolas.ferre@microchip.com Acked-by: Ludovic Desroches ludovic.desroches@microchip.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/at91/clk-generated.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 44db83a6d01c2..44a46dcc0518b 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -141,6 +141,8 @@ static int clk_generated_determine_rate(struct clk_hw *hw, continue;
div = DIV_ROUND_CLOSEST(parent_rate, req->rate); + if (div > GENERATED_MAX_DIV + 1) + div = GENERATED_MAX_DIV + 1;
clk_generated_best_diff(req, parent, parent_rate, div, &best_diff, &best_rate);
From: Chunyan Zhang chunyan.zhang@unisoc.com
[ Upstream commit c9a67cbb5189e966c70451562b2ca4c3876ab546 ]
Make REGMAP_MMIO selected to avoid undefined reference to regmap symbols.
Fixes: d41f59fd92f2 ("clk: sprd: Add common infrastructure") Signed-off-by: Chunyan Zhang chunyan.zhang@unisoc.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/sprd/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/clk/sprd/Kconfig b/drivers/clk/sprd/Kconfig index 91d3d721c801e..3c219af251001 100644 --- a/drivers/clk/sprd/Kconfig +++ b/drivers/clk/sprd/Kconfig @@ -3,6 +3,7 @@ config SPRD_COMMON_CLK tristate "Clock support for Spreadtrum SoCs" depends on ARCH_SPRD || COMPILE_TEST default ARCH_SPRD + select REGMAP_MMIO
if SPRD_COMMON_CLK
From: Geert Uytterhoeven geert+renesas@glider.be
[ Upstream commit e1f1ae8002e4b06addc52443fcd975bbf554ae92 ]
The module reset code in the Renesas CPG/MSSR driver uses read-modify-write (RMW) operations to write to a Software Reset Register (SRCRn), and simple writes to write to a Software Reset Clearing Register (SRSTCLRn), as was mandated by the R-Car Gen2 and Gen3 Hardware User's Manuals.
However, this may cause a race condition when two devices are reset in parallel: if the reset for device A completes in the middle of the RMW operation for device B, device A may be reset again, causing subtle failures (e.g. i2c timeouts):
thread A thread B -------- --------
val = SRCRn val |= bit A SRCRn = val
delay
val = SRCRn (bit A is set)
SRSTCLRn = bit A (bit A in SRCRn is cleared)
val |= bit B SRCRn = val (bit A and B are set)
This can be reproduced on e.g. Salvator-XS using:
$ while true; do i2cdump -f -y 4 0x6A b > /dev/null; done & $ while true; do i2cdump -f -y 2 0x10 b > /dev/null; done &
i2c-rcar e6510000.i2c: error -110 : 40000002 i2c-rcar e66d8000.i2c: error -110 : 40000002
According to the R-Car Gen3 Hardware Manual Errata for Rev. 0.80 of Feb 28, 2018, reflected in Rev. 1.00 of the R-Car Gen3 Hardware User's Manual, writes to SRCRn do not require read-modify-write cycles.
Note that the R-Car Gen2 Hardware User's Manual has not been updated yet, and still says a read-modify-write sequence is required. According to the hardware team, the reset hardware block is the same on both R-Car Gen2 and Gen3, though.
Hence fix the issue by replacing the read-modify-write operations on SRCRn by simple writes.
Reported-by: Yao Lihua Lihua.Yao@desay-svautomotive.com Fixes: 6197aa65c4905532 ("clk: renesas: cpg-mssr: Add support for reset control") Signed-off-by: Geert Uytterhoeven geert+renesas@glider.be Tested-by: Linh Phung linh.phung.jy@renesas.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/renesas/renesas-cpg-mssr.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index 0201809bbd377..9dfa28d6fd9f9 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -576,17 +576,11 @@ static int cpg_mssr_reset(struct reset_controller_dev *rcdev, unsigned int reg = id / 32; unsigned int bit = id % 32; u32 bitmask = BIT(bit); - unsigned long flags; - u32 value;
dev_dbg(priv->dev, "reset %u%02u\n", reg, bit);
/* Reset module */ - spin_lock_irqsave(&priv->rmw_lock, flags); - value = readl(priv->base + SRCR(reg)); - value |= bitmask; - writel(value, priv->base + SRCR(reg)); - spin_unlock_irqrestore(&priv->rmw_lock, flags); + writel(bitmask, priv->base + SRCR(reg));
/* Wait for at least one cycle of the RCLK clock (@ ca. 32 kHz) */ udelay(35); @@ -603,16 +597,10 @@ static int cpg_mssr_assert(struct reset_controller_dev *rcdev, unsigned long id) unsigned int reg = id / 32; unsigned int bit = id % 32; u32 bitmask = BIT(bit); - unsigned long flags; - u32 value;
dev_dbg(priv->dev, "assert %u%02u\n", reg, bit);
- spin_lock_irqsave(&priv->rmw_lock, flags); - value = readl(priv->base + SRCR(reg)); - value |= bitmask; - writel(value, priv->base + SRCR(reg)); - spin_unlock_irqrestore(&priv->rmw_lock, flags); + writel(bitmask, priv->base + SRCR(reg)); return 0; }
From: Christoph Hellwig hch@lst.de
[ Upstream commit 66d7780f18eae0232827fcffeaded39a6a168236 ]
Check that the pfn returned from arch_dma_coherent_to_pfn refers to a valid page and reject the mmap / get_sgtable requests otherwise.
Based on the arm implementation of the mmap and get_sgtable methods.
Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: Vignesh Raghavendra vigneshr@ti.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/dma/mapping.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f7afdadb6770b..3401382bbca2f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -116,11 +116,16 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, int ret;
if (!dev_is_dma_coherent(dev)) { + unsigned long pfn; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO;
- page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr, - dma_addr)); + /* If the PFN is not valid, we do not have a struct page */ + pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; + page = pfn_to_page(pfn); } else { page = virt_to_page(cpu_addr); } @@ -170,7 +175,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, if (!dev_is_dma_coherent(dev)) { if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO; + + /* If the PFN is not valid, we do not have a struct page */ pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; } else { pfn = page_to_pfn(virt_to_page(cpu_addr)); }
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit e3cacb73e626d885b8cf24103fed0ae26518e3c4 ]
Assembly entry/return abstraction change didn't add asmmacro.h include statement to coprocessor.S, resulting in references to undefined macros abi_entry and abi_ret on cores that define XTENSA_HAVE_COPROCESSORS. Fix that by including asm/asmmacro.h from the coprocessor.S.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/xtensa/kernel/coprocessor.S | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index 92bf24a9da929..e723129c36688 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -14,6 +14,7 @@
#include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/asmmacro.h> #include <asm/processor.h> #include <asm/coprocessor.h> #include <asm/thread_info.h>
Hello,
On Tue, Aug 6, 2019 at 2:33 PM Sasha Levin sashal@kernel.org wrote:
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit e3cacb73e626d885b8cf24103fed0ae26518e3c4 ]
Assembly entry/return abstraction change didn't add asmmacro.h include statement to coprocessor.S, resulting in references to undefined macros abi_entry and abi_ret on cores that define XTENSA_HAVE_COPROCESSORS. Fix that by including asm/asmmacro.h from the coprocessor.S.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/xtensa/kernel/coprocessor.S | 1 + 1 file changed, 1 insertion(+)
This fix is only relevant to 5.3, as it fixes a bug introduced in 5.3-rc1.
On Tue, Aug 06, 2019 at 02:55:29PM -0700, Max Filippov wrote:
Hello,
On Tue, Aug 6, 2019 at 2:33 PM Sasha Levin sashal@kernel.org wrote:
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit e3cacb73e626d885b8cf24103fed0ae26518e3c4 ]
Assembly entry/return abstraction change didn't add asmmacro.h include statement to coprocessor.S, resulting in references to undefined macros abi_entry and abi_ret on cores that define XTENSA_HAVE_COPROCESSORS. Fix that by including asm/asmmacro.h from the coprocessor.S.
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/xtensa/kernel/coprocessor.S | 1 + 1 file changed, 1 insertion(+)
This fix is only relevant to 5.3, as it fixes a bug introduced in 5.3-rc1.
Now dropped, thank you.
-- Thanks, Sasha
From: Jean Delvare jdelvare@suse.de
[ Upstream commit edbfe83def34153a05439ecb3352ae0bb65024de ]
Only first MODULE_SOFTDEP statement is handled per module. Multiple dependencies must be expressed in a single statement.
Signed-off-by: Jean Delvare jdelvare@suse.de Cc: "Enrico Weigelt, metux IT consult" info@metux.net Cc: Darren Hart dvhart@infradead.org Cc: Andy Shevchenko andy@infradead.org [andy: massaged commit message] Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/pcengines-apuv2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index c1ca931e1fab8..7a8cbfb5d2135 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -255,6 +255,4 @@ MODULE_DESCRIPTION("PC Engines APUv2/APUv3 board GPIO/LED/keys driver"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(dmi, apu_gpio_dmi_table); MODULE_ALIAS("platform:pcengines-apuv2"); -MODULE_SOFTDEP("pre: platform:" AMD_FCH_GPIO_DRIVER_NAME); -MODULE_SOFTDEP("pre: platform:leds-gpio"); -MODULE_SOFTDEP("pre: platform:gpio_keys_polled"); +MODULE_SOFTDEP("pre: platform:" AMD_FCH_GPIO_DRIVER_NAME " platform:leds-gpio platform:gpio_keys_polled");
From: Rajneesh Bhardwaj rajneesh.bhardwaj@linux.intel.com
[ Upstream commit 66013e8ec6850f9c62df6aea555fe7668e84dc3c ]
Ice Lake Neural Network Processor for deep learning inference a.k.a. ICL-NNPI can re-use Ice Lake Mobile regmap to enable Intel PMC Core driver on it.
Cc: Darren Hart dvhart@infradead.org Cc: Andy Shevchenko andy@infradead.org Cc: platform-driver-x86@vger.kernel.org Link: https://lkml.org/lkml/2019/6/5/1034 Signed-off-by: Rajneesh Bhardwaj rajneesh.bhardwaj@linux.intel.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/intel_pmc_core.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index 1d902230ba611..be6cda89dcf5b 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -815,6 +815,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { INTEL_CPU_FAM6(KABYLAKE_DESKTOP, spt_reg_map), INTEL_CPU_FAM6(CANNONLAKE_MOBILE, cnp_reg_map), INTEL_CPU_FAM6(ICELAKE_MOBILE, icl_reg_map), + INTEL_CPU_FAM6(ICELAKE_NNPI, icl_reg_map), {} };
From: Christoph Hellwig hch@lst.de
[ Upstream commit 2bcbeaefde2f0384d6ad351c151b1a9fe7791a0a ]
We should not have two different error codes for the same condition. EAGAIN must be reserved for the FAULT_FLAG_ALLOW_RETRY retry case and signals to the caller that the mmap_sem has been unlocked.
Use EBUSY for the !valid case so that callers can get the locking right.
Link: https://lore.kernel.org/r/20190724065258.16603-2-hch@lst.de Tested-by: Ralph Campbell rcampbell@nvidia.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Ralph Campbell rcampbell@nvidia.com Reviewed-by: Jason Gunthorpe jgg@mellanox.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com [jgg: elaborated commit message] Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/vm/hmm.rst | 2 +- mm/hmm.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index 7cdf7282e0229..65b6c1109cc81 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -231,7 +231,7 @@ respect in order to keep things properly synchronized. The usage pattern is:: ret = hmm_range_snapshot(&range); if (ret) { up_read(&mm->mmap_sem); - if (ret == -EAGAIN) { + if (ret == -EBUSY) { /* * No need to check hmm_range_wait_until_valid() return value * on retry we will get proper error with hmm_range_snapshot() diff --git a/mm/hmm.c b/mm/hmm.c index 4c405dfbd2b3d..27dd9a8816272 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -995,7 +995,7 @@ EXPORT_SYMBOL(hmm_range_unregister); * @range: range * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid * permission (for instance asking for write and range is read only), - * -EAGAIN if you need to retry, -EFAULT invalid (ie either no valid + * -EBUSY if you need to retry, -EFAULT invalid (ie either no valid * vma or it is illegal to access that range), number of valid pages * in range->pfns[] (from range start address). * @@ -1019,7 +1019,7 @@ long hmm_range_snapshot(struct hmm_range *range) do { /* If range is no longer valid force retry. */ if (!range->valid) - return -EAGAIN; + return -EBUSY;
vma = find_vma(hmm->mm, start); if (vma == NULL || (vma->vm_flags & device_vma)) @@ -1117,10 +1117,8 @@ long hmm_range_fault(struct hmm_range *range, bool block)
do { /* If range is no longer valid force retry. */ - if (!range->valid) { - up_read(&hmm->mm->mmap_sem); - return -EAGAIN; - } + if (!range->valid) + return -EBUSY;
vma = find_vma(hmm->mm, start); if (vma == NULL || (vma->vm_flags & device_vma))
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit 09e088a4903bd0dd911b4f1732b250130cdaffed ]
Fixes gcc '-Wunused-but-set-variable' warning:
drivers/xen/xen-pciback/conf_space_capability.c: In function pm_ctrl_write: drivers/xen/xen-pciback/conf_space_capability.c:119:25: warning: variable old_state set but not used [-Wunused-but-set-variable]
It is never used so can be removed.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: Boris Ostrovsky boris.ostrovsky@oracle.com Signed-off-by: Juergen Gross jgross@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/xen/xen-pciback/conf_space_capability.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 73427d8e01161..e5694133ebe57 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -116,13 +116,12 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, { int err; u16 old_value; - pci_power_t new_state, old_state; + pci_power_t new_state;
err = pci_read_config_word(dev, offset, &old_value); if (err) goto out;
- old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS;
From: Nianyao Tang tangnianyao@huawei.com
[ Upstream commit 34f8eb92ca053cbba2887bb7e4dbf2b2cd6eb733 ]
In its_vpe_init, when its_alloc_vpe_table fails, we should free vpt_page allocated just before, instead of vpe->vpt_page. Let's fix it.
Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Cc: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Nianyao Tang tangnianyao@huawei.com Signed-off-by: Shaokun Zhang zhangshaokun@hisilicon.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 35500801dc2b5..20e5482d91b94 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3010,7 +3010,7 @@ static int its_vpe_init(struct its_vpe *vpe)
if (!its_alloc_vpe_table(vpe_id)) { its_vpe_id_free(vpe_id); - its_free_pending_table(vpe->vpt_page); + its_free_pending_table(vpt_page); return -ENOMEM; }
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit 9a446ef08f3bfc0c3deb9c6be840af2528ef8cf8 ]
The GPCv2 is a stacked IRQ controller below the ARM GIC. It doesn't care about the IRQ type itself, but needs to forward the type to the parent IRQ controller, so this one can be configured correctly.
Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-imx-gpcv2.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index bf2237ac5d091..4f74c15c47555 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -131,6 +131,7 @@ static struct irq_chip gpcv2_irqchip_data_chip = { .irq_unmask = imx_gpcv2_irq_unmask, .irq_set_wake = imx_gpcv2_irq_set_wake, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif
From: Jaegeuk Kim jaegeuk@kernel.org
[ Upstream commit 543b8c468f55f27f3c0178a22a91a51aabbbc428 ]
f2fs_allocate_data_block() invalidates old block address and enable new block address. Then, if we try to read old block by f2fs_submit_page_bio(), it will give WARN due to reading invalid blocks.
Let's make the order sanely back.
Reviewed-by: Chao Yu yuchao0@huawei.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/gc.c | 70 +++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 36 deletions(-)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 963fb4571fd98..bb6fd5a506d39 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -794,6 +794,29 @@ static int move_data_block(struct inode *inode, block_t bidx, if (lfs_mode) down_write(&fio.sbi->io_order_lock);
+ mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, false); + if (!mpage) + goto up_out; + + fio.encrypted_page = mpage; + + /* read source block in mpage */ + if (!PageUptodate(mpage)) { + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(mpage, 1); + goto up_out; + } + lock_page(mpage); + if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || + !PageUptodate(mpage))) { + err = -EIO; + f2fs_put_page(mpage, 1); + goto up_out; + } + } + f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false);
@@ -801,44 +824,18 @@ static int move_data_block(struct inode *inode, block_t bidx, newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; - goto recover_block; - } - - mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), - fio.old_blkaddr, FGP_LOCK, GFP_NOFS); - if (mpage) { - bool updated = false; - - if (PageUptodate(mpage)) { - memcpy(page_address(fio.encrypted_page), - page_address(mpage), PAGE_SIZE); - updated = true; - } f2fs_put_page(mpage, 1); - invalidate_mapping_pages(META_MAPPING(fio.sbi), - fio.old_blkaddr, fio.old_blkaddr); - if (updated) - goto write_page; - } - - err = f2fs_submit_page_bio(&fio); - if (err) - goto put_page_out; - - /* write page */ - lock_page(fio.encrypted_page); - - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { - err = -EIO; - goto put_page_out; - } - if (unlikely(!PageUptodate(fio.encrypted_page))) { - err = -EIO; - goto put_page_out; + goto recover_block; }
-write_page: + /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); + set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -869,11 +866,12 @@ static int move_data_block(struct inode *inode, block_t bidx, put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: - if (lfs_mode) - up_write(&fio.sbi->io_order_lock); if (err) f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, true, true); +up_out: + if (lfs_mode) + up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out:
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit 7ee526152db7a75d7b8713346dac76ffc3662b29 ]
In addition to _IOW() and _IOR(), to handle this case:
#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
That will happen in the next sync of this header file.
Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: Luis Cláudio Gonçalves lclaudio@redhat.com Cc: Namhyung Kim namhyung@kernel.org Link: https://lkml.kernel.org/n/tip-3br5e4t64e4lp0goo84che3s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/trace/beauty/usbdevfs_ioctl.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh index 930b80f422e83..aa597ae537470 100755 --- a/tools/perf/trace/beauty/usbdevfs_ioctl.sh +++ b/tools/perf/trace/beauty/usbdevfs_ioctl.sh @@ -3,10 +3,13 @@
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+# also as: +# #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) + printf "static const char *usbdevfs_ioctl_cmds[] = {\n" -regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" -egrep $regex ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \ - sed -r "s/$regex/\2 \1/g" | \ +regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)((\w+))?[[:space:]]+_IO[CWR]{0,2}([[:space:]]*(_IOC_\w+,[[:space:]]*)?'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" +egrep "$regex" ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \ + sed -r "s/$regex/\4 \1/g" | \ sort | xargs printf "\t[%s] = "%s",\n" printf "};\n\n" printf "#if 0\n"
From: Vince Weaver vincent.weaver@maine.edu
[ Upstream commit 7622236ceb167aa3857395f9bdaf871442aa467e ]
So I have been having lots of trouble with hand-crafted perf.data files causing segfaults and the like, so I have started fuzzing the perf tool.
First issue found:
If f_header.attr_size is 0 in the perf.data file, then perf will crash with a divide-by-zero error.
Committer note:
Added a pr_err() to tell the user why the command failed.
Signed-off-by: Vince Weaver vincent.weaver@maine.edu Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Namhyung Kim namhyung@kernel.org Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1907231100440.14532@macbook-air Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/header.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b82d4577d9694..fd543f209bd0a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3478,6 +3478,13 @@ int perf_session__read_header(struct perf_session *session) data->file.path); }
+ if (f_header.attr_size == 0) { + pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n" + "Was the 'perf record' command properly terminated?\n", + data->file.path); + return -EINVAL; + } + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET);
From: Numfor Mbiziwo-Tiapo nums@google.com
[ Upstream commit 20f9781f491360e7459c589705a2e4b1f136bee9 ]
When building our local version of perf with MSAN (Memory Sanitizer) and running the perf record command, MSAN throws a use of uninitialized value warning in "tools/perf/util/util.c:333:6".
This warning stems from the "buf" variable being passed into "write". It originated as the variable "ev" with the type union perf_event* defined in the "perf_event__synthesize_attr" function in "tools/perf/util/header.c".
In the "perf_event__synthesize_attr" function they allocate space with a malloc call using ev, then go on to only assign some of the member variables before passing "ev" on as a parameter to the "process" function therefore "ev" contains uninitialized memory. Changing the malloc call to zalloc to initialize all the members of "ev" which gets rid of the warning.
To reproduce this warning, build perf by running: make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-fsanitize=memory\ -fsanitize-memory-track-origins"
(Additionally, llvm might have to be installed and clang might have to be specified as the compiler - export CC=/usr/bin/clang)
then running: tools/perf/perf record -o - ls / | tools/perf/perf --no-pager annotate\ -i - --stdio
Please see the cover letter for why false positive warnings may be generated.
Signed-off-by: Numfor Mbiziwo-Tiapo nums@google.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Ian Rogers irogers@google.com Cc: Jiri Olsa jolsa@redhat.com Cc: Mark Drayton mbd@fb.com Cc: Namhyung Kim namhyung@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Song Liu songliubraving@fb.com Cc: Stephane Eranian eranian@google.com Link: http://lkml.kernel.org/r/20190724234500.253358-2-nums@google.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fd543f209bd0a..1156ebda47c3f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3565,7 +3565,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, size += sizeof(struct perf_event_header); size += ids * sizeof(u64);
- ev = malloc(size); + ev = zalloc(size);
if (ev == NULL) return -ENOMEM;
From: Michal Kalderon michal.kalderon@marvell.com
[ Upstream commit 15fe6a8dcc3b48358c28e17b485fc837f9605ec4 ]
There was a place holder for hca_type and vendor was returned in hca_rev. Fix the hca_rev to return the hw revision and fix the hca_type to return an informative string representing the hca.
Signed-off-by: Michal Kalderon michal.kalderon@marvell.com Link: https://lore.kernel.org/r/20190728111338.21930-1-michal.kalderon@marvell.com Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/qedr/main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 083c2c00a8e91..dfdd1e16de7f5 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -125,14 +125,20 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); + struct qedr_dev *dev = + rdma_device_to_drv_device(device, struct qedr_dev, ibdev); + + return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", + dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? + "iWARP" : "RoCE"); } static DEVICE_ATTR_RO(hca_type);
From: Yuki Tsunashima ytsunashima@jp.adit-jv.com
[ Upstream commit 37151a41df800493cfcbbef4f7208ffe04feb959 ]
lost wakeup can occur after enabling irq, therefore put task into interruptible before enabling interrupts,
without this change, task can be put to sleep and snd_pcm_drain will delay
Fixes: f2b3614cefb6 ("ALSA: PCM - Don't check DMA time-out too shortly") Signed-off-by: Yuki Tsunashima ytsunashima@jp.adit-jv.com Signed-off-by: Suresh Udipi sudipi@jp.adit-jv.com [ported from 4.9] Signed-off-by: Adam Miartus amiartus@de.adit-jv.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/core/pcm_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 12dd9b318db18..703857aab00fc 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1873,6 +1873,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, if (!to_check) break; /* all drained */ init_waitqueue_entry(&wait, current); + set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&to_check->sleep, &wait); snd_pcm_stream_unlock_irq(substream); if (runtime->no_period_wakeup) @@ -1885,7 +1886,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, } tout = msecs_to_jiffies(tout * 1000); } - tout = schedule_timeout_interruptible(tout); + tout = schedule_timeout(tout);
snd_pcm_stream_lock_irq(substream); group = snd_pcm_stream_group_ref(substream);
From: Kees Cook keescook@chromium.org
[ Upstream commit 71d6c505b4d9e6f76586350450e785e3d452b346 ]
Jeffrin reported a KASAN issue:
BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70 Read of size 16 at addr ffffffff91f41f80 by task scsi_eh_1/149 ... The buggy address belongs to the variable: cdb.48319+0x0/0x40
Much like commit 18c9a99bce2a ("libata: zpodd: small read overflow in eject_tray()"), this fixes a cdb[] buffer length, this time in zpodd_get_mech_type():
We read from the cdb[] buffer in ata_exec_internal_sg(). It has to be ATAPI_CDB_LEN (16) bytes long, but this buffer is only 12 bytes.
Reported-by: Jeffrin Jose T jeffrin@rajagiritech.edu.in Fixes: afe759511808c ("libata: identify and init ZPODD devices") Link: https://lore.kernel.org/lkml/201907181423.E808958@keescook/ Tested-by: Jeffrin Jose T jeffrin@rajagiritech.edu.in Reviewed-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/libata-zpodd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 173e6f2dd9af0..eefda51f97d35 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -56,7 +56,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) unsigned int ret; struct rm_feature_desc *desc; struct ata_taskfile tf; - static const char cdb[] = { GPCMD_GET_CONFIGURATION, + static const char cdb[ATAPI_CDB_LEN] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ 0, 0, 0,/* reserved */
From: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com
[ Upstream commit da1115fdbd6e86c62185cdd2b4bf7add39f2f82b ]
Currently, nvdimm subsystem expects the device numa node for SCM device to be an online node. It also doesn't try to bring the device numa node online. Hence if we use a non-online numa node as device node we hit crashes like below. This is because we try to access uninitialized NODE_DATA in different code paths.
cpu 0x0: Vector: 300 (Data Access) at [c0000000fac53170] pc: c0000000004bbc50: ___slab_alloc+0x120/0xca0 lr: c0000000004bc834: __slab_alloc+0x64/0xc0 sp: c0000000fac53400 msr: 8000000002009033 dar: 73e8 dsisr: 80000 current = 0xc0000000fabb6d80 paca = 0xc000000003870000 irqmask: 0x03 irq_happened: 0x01 pid = 7, comm = kworker/u16:0 Linux version 5.2.0-06234-g76bd729b2644 (kvaneesh@ltc-boston123) (gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)) #135 SMP Thu Jul 11 05:36:30 CDT 2019 enter ? for help [link register ] c0000000004bc834 __slab_alloc+0x64/0xc0 [c0000000fac53400] c0000000fac53480 (unreliable) [c0000000fac53500] c0000000004bc818 __slab_alloc+0x48/0xc0 [c0000000fac53560] c0000000004c30a0 __kmalloc_node_track_caller+0x3c0/0x6b0 [c0000000fac535d0] c000000000cfafe4 devm_kmalloc+0x74/0xc0 [c0000000fac53600] c000000000d69434 nd_region_activate+0x144/0x560 [c0000000fac536d0] c000000000d6b19c nd_region_probe+0x17c/0x370 [c0000000fac537b0] c000000000d6349c nvdimm_bus_probe+0x10c/0x230 [c0000000fac53840] c000000000cf3cc4 really_probe+0x254/0x4e0 [c0000000fac538d0] c000000000cf429c driver_probe_device+0x16c/0x1e0 [c0000000fac53950] c000000000cf0b44 bus_for_each_drv+0x94/0x130 [c0000000fac539b0] c000000000cf392c __device_attach+0xdc/0x200 [c0000000fac53a50] c000000000cf231c bus_probe_device+0x4c/0xf0 [c0000000fac53a90] c000000000ced268 device_add+0x528/0x810 [c0000000fac53b60] c000000000d62a58 nd_async_device_register+0x28/0xa0 [c0000000fac53bd0] c0000000001ccb8c async_run_entry_fn+0xcc/0x1f0 [c0000000fac53c50] c0000000001bcd9c process_one_work+0x46c/0x860 [c0000000fac53d20] c0000000001bd4f4 worker_thread+0x364/0x5f0 [c0000000fac53db0] c0000000001c7260 kthread+0x1b0/0x1c0 [c0000000fac53e20] c00000000000b954 ret_from_kernel_thread+0x5c/0x68
The patch tries to fix this by picking the nearest online node as the SCM node. This does have a problem of us losing the information that SCM node is equidistant from two other online nodes. If applications need to understand these fine-grained details we should express then like x86 does via /sys/devices/system/node/nodeX/accessY/initiators/
With the patch we get
# numactl -H available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 130865 MB node 1 free: 129130 MB node distances: node 0 1 0: 10 20 1: 20 10 # cat /sys/bus/nd/devices/region0/numa_node 0 # dmesg | grep papr_scm [ 91.332305] papr_scm ibm,persistent-memory:ibm,pmemory@44104001: Region registered with target node 2 and online node 0
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Link: https://lore.kernel.org/r/20190729095128.23707-1-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/platforms/pseries/papr_scm.c | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 96c53b23e58f9..30af5019a68fe 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -194,12 +194,32 @@ static const struct attribute_group *papr_scm_dimm_groups[] = { NULL, };
+static inline int papr_scm_node(int node) +{ + int min_dist = INT_MAX, dist; + int nid, min_node; + + if ((node == NUMA_NO_NODE) || node_online(node)) + return node; + + min_node = first_online_node; + for_each_online_node(nid) { + dist = node_distance(node, nid); + if (dist < min_dist) { + min_dist = dist; + min_node = nid; + } + } + return min_node; +} + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; struct nd_mapping_desc mapping; struct nd_region_desc ndr_desc; unsigned long dimm_flags; + int target_nid, online_nid;
p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -238,8 +258,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(&p->pdev->dev); - ndr_desc.target_node = ndr_desc.numa_node; + target_nid = dev_to_node(&p->pdev->dev); + online_nid = papr_scm_node(target_nid); + ndr_desc.numa_node = online_nid; + ndr_desc.target_node = target_nid; ndr_desc.res = &p->res; ndr_desc.of_node = p->dn; ndr_desc.provider_data = p; @@ -254,6 +276,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.res, p->dn); goto err; } + if (target_nid != online_nid) + dev_info(dev, "Region registered with target node %d and online node %d", + target_nid, online_nid);
return 0;
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit f4cc743a98136df3c3763050a0e8223b52d9a960 ]
If DRM_LVDS_ENCODER=y but CONFIG_DRM_KMS_HELPER=m, build fails:
drivers/gpu/drm/bridge/lvds-encoder.o: In function `lvds_encoder_probe': lvds-encoder.c:(.text+0x155): undefined reference to `devm_drm_panel_bridge_add'
Reported-by: Hulk Robot hulkci@huawei.com Fixes: dbb58bfd9ae6 ("drm/bridge: Fix lvds-encoder since the panel_bridge rework.") Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Link: https://patchwork.freedesktop.org/patch/msgid/20190729071216.27488-1-yuehaib... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ee777469293a4..cc62603b87c59 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -48,6 +48,7 @@ config DRM_DUMB_VGA_DAC config DRM_LVDS_ENCODER tristate "Transparent parallel to LVDS encoder support" depends on OF + select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help Support for transparent parallel to LVDS encoders that don't require
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit e1ae72a21e5f0d1846e26e3f5963930664702071 ]
If CONFIG_DRM_TOSHIBA_TC358764=y but CONFIG_DRM_KMS_HELPER=m, building fails:
drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x228): undefined reference to `drm_atomic_helper_connector_reset' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x240): undefined reference to `drm_helper_probe_single_connector_modes' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x268): undefined reference to `drm_atomic_helper_connector_duplicate_state' drivers/gpu/drm/bridge/tc358764.o:(.rodata+0x270): undefined reference to `drm_atomic_helper_connector_destroy_state'
Like TC358767, select DRM_KMS_HELPER to fix this, and change to select DRM_PANEL to avoid recursive dependency.
Reported-by: Hulk Robot hulkci@huawei.com Fixes: f38b7cca6d0e ("drm/bridge: tc358764: Add DSI to LVDS bridge driver") Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Reviewed-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Link: https://patchwork.freedesktop.org/patch/msgid/20190729090520.25968-1-yuehaib... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index cc62603b87c59..e4e22bbae2a7c 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -117,9 +117,10 @@ config DRM_THINE_THC63LVD1024
config DRM_TOSHIBA_TC358764 tristate "TC358764 DSI/LVDS bridge" - depends on DRM && DRM_PANEL depends on OF select DRM_MIPI_DSI + select DRM_KMS_HELPER + select DRM_PANEL help Toshiba TC358764 DSI/LVDS bridge driver.
From: Filipe Manana fdmanana@suse.com
[ Upstream commit a6d155d2e363f26290ffd50591169cb96c2a609e ]
The fiemap handler locks a file range that can have unflushed delalloc, and after locking the range, it tries to attach to a running transaction. If the running transaction started its commit, that is, it is in state TRANS_STATE_COMMIT_START, and either the filesystem was mounted with the flushoncommit option or the transaction is creating a snapshot for the subvolume that contains the file that fiemap is operating on, we end up deadlocking. This happens because fiemap is blocked on the transaction, waiting for it to complete, and the transaction is waiting for the flushed dealloc to complete, which requires locking the file range that the fiemap task already locked. The following stack traces serve as an example of when this deadlock happens:
(...) [404571.515510] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] [404571.515956] Call Trace: [404571.516360] ? __schedule+0x3ae/0x7b0 [404571.516730] schedule+0x3a/0xb0 [404571.517104] lock_extent_bits+0x1ec/0x2a0 [btrfs] [404571.517465] ? remove_wait_queue+0x60/0x60 [404571.517832] btrfs_finish_ordered_io+0x292/0x800 [btrfs] [404571.518202] normal_work_helper+0xea/0x530 [btrfs] [404571.518566] process_one_work+0x21e/0x5c0 [404571.518990] worker_thread+0x4f/0x3b0 [404571.519413] ? process_one_work+0x5c0/0x5c0 [404571.519829] kthread+0x103/0x140 [404571.520191] ? kthread_create_worker_on_cpu+0x70/0x70 [404571.520565] ret_from_fork+0x3a/0x50 [404571.520915] kworker/u8:6 D 0 31651 2 0x80004000 [404571.521290] Workqueue: btrfs-flush_delalloc btrfs_flush_delalloc_helper [btrfs] (...) [404571.537000] fsstress D 0 13117 13115 0x00004000 [404571.537263] Call Trace: [404571.537524] ? __schedule+0x3ae/0x7b0 [404571.537788] schedule+0x3a/0xb0 [404571.538066] wait_current_trans+0xc8/0x100 [btrfs] [404571.538349] ? remove_wait_queue+0x60/0x60 [404571.538680] start_transaction+0x33c/0x500 [btrfs] [404571.539076] btrfs_check_shared+0xa3/0x1f0 [btrfs] [404571.539513] ? extent_fiemap+0x2ce/0x650 [btrfs] [404571.539866] extent_fiemap+0x2ce/0x650 [btrfs] [404571.540170] do_vfs_ioctl+0x526/0x6f0 [404571.540436] ksys_ioctl+0x70/0x80 [404571.540734] __x64_sys_ioctl+0x16/0x20 [404571.540997] do_syscall_64+0x60/0x1d0 [404571.541279] entry_SYSCALL_64_after_hwframe+0x49/0xbe (...) [404571.543729] btrfs D 0 14210 14208 0x00004000 [404571.544023] Call Trace: [404571.544275] ? __schedule+0x3ae/0x7b0 [404571.544526] ? wait_for_completion+0x112/0x1a0 [404571.544795] schedule+0x3a/0xb0 [404571.545064] schedule_timeout+0x1ff/0x390 [404571.545351] ? lock_acquire+0xa6/0x190 [404571.545638] ? wait_for_completion+0x49/0x1a0 [404571.545890] ? wait_for_completion+0x112/0x1a0 [404571.546228] wait_for_completion+0x131/0x1a0 [404571.546503] ? wake_up_q+0x70/0x70 [404571.546775] btrfs_wait_ordered_extents+0x27c/0x400 [btrfs] [404571.547159] btrfs_commit_transaction+0x3b0/0xae0 [btrfs] [404571.547449] ? btrfs_mksubvol+0x4a4/0x640 [btrfs] [404571.547703] ? remove_wait_queue+0x60/0x60 [404571.547969] btrfs_mksubvol+0x605/0x640 [btrfs] [404571.548226] ? __sb_start_write+0xd4/0x1c0 [404571.548512] ? mnt_want_write_file+0x24/0x50 [404571.548789] btrfs_ioctl_snap_create_transid+0x169/0x1a0 [btrfs] [404571.549048] btrfs_ioctl_snap_create_v2+0x11d/0x170 [btrfs] [404571.549307] btrfs_ioctl+0x133f/0x3150 [btrfs] [404571.549549] ? mem_cgroup_charge_statistics+0x4c/0xd0 [404571.549792] ? mem_cgroup_commit_charge+0x84/0x4b0 [404571.550064] ? __handle_mm_fault+0xe3e/0x11f0 [404571.550306] ? do_raw_spin_unlock+0x49/0xc0 [404571.550608] ? _raw_spin_unlock+0x24/0x30 [404571.550976] ? __handle_mm_fault+0xedf/0x11f0 [404571.551319] ? do_vfs_ioctl+0xa2/0x6f0 [404571.551659] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [404571.552087] do_vfs_ioctl+0xa2/0x6f0 [404571.552355] ksys_ioctl+0x70/0x80 [404571.552621] __x64_sys_ioctl+0x16/0x20 [404571.552864] do_syscall_64+0x60/0x1d0 [404571.553104] entry_SYSCALL_64_after_hwframe+0x49/0xbe (...)
If we were joining the transaction instead of attaching to it, we would not risk a deadlock because a join only blocks if the transaction is in a state greater then or equals to TRANS_STATE_COMMIT_DOING, and the delalloc flush performed by a transaction is done before it reaches that state, when it is in the state TRANS_STATE_COMMIT_START. However a transaction join is intended for use cases where we do modify the filesystem, and fiemap only needs to peek at delayed references from the current transaction in order to determine if extents are shared, and, besides that, when there is no current transaction or when it blocks to wait for a current committing transaction to complete, it creates a new transaction without reserving any space. Such unnecessary transactions, besides doing unnecessary IO, can cause transaction aborts (-ENOSPC) and unnecessary rotation of the precious backup roots.
So fix this by adding a new transaction join variant, named join_nostart, which behaves like the regular join, but it does not create a transaction when none currently exists or after waiting for a committing transaction to complete.
Fixes: 03628cdbc64db6 ("Btrfs: do not start a transaction during fiemap") Signed-off-by: Filipe Manana fdmanana@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/backref.c | 2 +- fs/btrfs/transaction.c | 22 ++++++++++++++++++---- fs/btrfs/transaction.h | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 982152d3f9200..69f8ab4d91f2b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1488,7 +1488,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) goto out; }
- trans = btrfs_attach_transaction(root); + trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3f6811cdf803b..168942c5af89e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | __TRANS_ATTACH | - __TRANS_JOIN), + __TRANS_JOIN | + __TRANS_JOIN_NOSTART), [TRANS_STATE_UNBLOCKED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), [TRANS_STATE_COMPLETED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), };
void btrfs_put_transaction(struct btrfs_transaction *transaction) @@ -525,7 +528,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, ret = join_transaction(fs_info, type); if (ret == -EBUSY) { wait_current_trans(fs_info); - if (unlikely(type == TRANS_ATTACH)) + if (unlikely(type == TRANS_ATTACH || + type == TRANS_JOIN_NOSTART)) ret = -ENOENT; } } while (ret == -EBUSY); @@ -641,6 +645,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root BTRFS_RESERVE_NO_FLUSH, true); }
+/* + * Similar to regular join but it never starts a transaction when none is + * running or after waiting for the current one to finish. + */ +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root) +{ + return start_transaction(root, 0, TRANS_JOIN_NOSTART, + BTRFS_RESERVE_NO_FLUSH, true); +} + /* * btrfs_attach_transaction() - catch the running transaction * diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 78c446c222b7d..2f695587f828e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -94,11 +94,13 @@ struct btrfs_transaction { #define __TRANS_JOIN (1U << 11) #define __TRANS_JOIN_NOLOCK (1U << 12) #define __TRANS_DUMMY (1U << 13) +#define __TRANS_JOIN_NOSTART (1U << 14)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) #define TRANS_ATTACH (__TRANS_ATTACH) #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) +#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART)
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( int min_factor); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction_barrier( struct btrfs_root *root);
From: Don Brace don.brace@microsemi.com
[ Upstream commit eeebce1862970653cdf5c01e98bc669edd8f529a ]
Reviewed-by: Bader Ali - Saleh bader.alisaleh@microsemi.com Reviewed-by: Scott Teel scott.teel@microsemi.com Reviewed-by: Scott Benesh scott.benesh@microsemi.com Reviewed-by: Kevin Barnett kevin.barnett@microsemi.com Signed-off-by: Don Brace don.brace@microsemi.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/hpsa.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 8068520cf89ed..152de392f9aaf 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2320,6 +2320,8 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h, case IOACCEL2_SERV_RESPONSE_COMPLETE: switch (c2->error_data.status) { case IOACCEL2_STATUS_SR_TASK_COMP_GOOD: + if (cmd) + cmd->result = 0; break; case IOACCEL2_STATUS_SR_TASK_COMP_CHK_COND: cmd->result |= SAM_STAT_CHECK_CONDITION; @@ -2479,8 +2481,10 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
/* check for good status */ if (likely(c2->error_data.serv_response == 0 && - c2->error_data.status == 0)) + c2->error_data.status == 0)) { + cmd->result = 0; return hpsa_cmd_free_and_done(h, c, cmd); + }
/* * Any RAID offload error results in retry which will use @@ -5638,6 +5642,12 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) } c = cmd_tagged_alloc(h, cmd);
+ /* + * This is necessary because the SML doesn't zero out this field during + * error recovery. + */ + cmd->result = 0; + /* * Call alternate submit routine for I/O accelerated commands. * Retries always go down the normal I/O path.
From: Jia-Ju Bai baijiaju1990@gmail.com
[ Upstream commit e82f04ec6ba91065fd33a6201ffd7cab840e1475 ]
In qla2x00_alloc_fcport(), fcport is assigned to NULL in the error handling code on line 4880: fcport = NULL;
Then fcport is used on lines 4883-4886: INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn); INIT_WORK(&fcport->reg_work, qla_register_fcport_fn); INIT_LIST_HEAD(&fcport->gnl_entry); INIT_LIST_HEAD(&fcport->list);
Thus, possible null-pointer dereferences may occur.
To fix these bugs, qla2x00_alloc_fcport() directly returns NULL in the error handling code.
These bugs are found by a static analysis tool STCheck written by us.
Signed-off-by: Jia-Ju Bai baijiaju1990@gmail.com Acked-by: Himanshu Madhani hmadhani@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 54772d4c377f9..6a4c719497ca1 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4877,7 +4877,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) ql_log(ql_log_warn, vha, 0xd049, "Failed to allocate ct_sns request.\n"); kfree(fcport); - fcport = NULL; + return NULL; }
INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn);
From: Christian Brauner christian@brauner.io
[ Upstream commit 30b692d3b390c6fe78a5064be0c4bbd44a41be59 ]
Since commit b191d6491be6 ("pidfd: fix a poll race when setting exit_state") we unconditionally set exit_state to EXIT_ZOMBIE before calling into do_notify_parent(). This was done to eliminate a race when querying exit_state in do_notify_pidfd(). Back then we decided to do the absolute minimal thing to fix this and not touch the rest of the exit_notify() function where exit_state is set. Since this fix has not caused any issues change the setting of exit_state to EXIT_DEAD in the autoreap case to account for the fact hat exit_state is set to EXIT_ZOMBIE unconditionally. This fix was planned but also explicitly requested in [1] and makes the whole code more consistent.
/* References */ [1]: https://lore.kernel.org/lkml/CAHk-=wigcxGFR2szue4wavJtH5cYTTeNES=toUBVGsmX0r...
Signed-off-by: Christian Brauner christian@brauner.io Acked-by: Oleg Nesterov oleg@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/exit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c index a75b6a7f458a7..0922e84ba6c1f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -733,9 +733,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) autoreap = true; }
- tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; - if (tsk->exit_state == EXIT_DEAD) + if (autoreap) { + tsk->exit_state = EXIT_DEAD; list_add(&tsk->ptrace_entry, &dead); + }
/* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0))
From: Masahiro Yamada yamada.masahiro@socionext.com
[ Upstream commit b1d45c23284e55a379f85554a27a548b7988d47a ]
These include guards are broken.
Match the #if !define() and #define lines so that they work correctly.
Link: http://lkml.kernel.org/r/20190720103943.16982-1-yamada.masahiro@socionext.co...
Fixes: f54d1867005c3 ("dma-buf: Rename struct fence to dma_fence") Fixes: 2e26ca7150a4f ("tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header") Fixes: e543002f77f46 ("qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs") Fixes: 95f295f9fe081 ("dmaengine: tegra: add tracepoints to driver") Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/trace/events/dma_fence.h | 2 +- include/trace/events/napi.h | 4 ++-- include/trace/events/qdisc.h | 4 ++-- include/trace/events/tegra_apb_dma.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 2212adda8f77f..64e92d56c6a8f 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -2,7 +2,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM dma_fence
-#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_TRACE_DMA_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_DMA_FENCE_H
#include <linux/tracepoint.h> diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index f3a12566bed05..6678cf8b235b8 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -3,7 +3,7 @@ #define TRACE_SYSTEM napi
#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_NAPI_H_ +#define _TRACE_NAPI_H
#include <linux/netdevice.h> #include <linux/tracepoint.h> @@ -38,7 +38,7 @@ TRACE_EVENT(napi_poll,
#undef NO_DEV
-#endif /* _TRACE_NAPI_H_ */ +#endif /* _TRACE_NAPI_H */
/* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 60d0d8bd336d0..0d1a9ebf55ba4 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -2,7 +2,7 @@ #define TRACE_SYSTEM qdisc
#if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_QDISC_H_ +#define _TRACE_QDISC_H
#include <linux/skbuff.h> #include <linux/netdevice.h> @@ -44,7 +44,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) );
-#endif /* _TRACE_QDISC_H_ */ +#endif /* _TRACE_QDISC_H */
/* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h index 0818f62861109..971cd02d2dafe 100644 --- a/include/trace/events/tegra_apb_dma.h +++ b/include/trace/events/tegra_apb_dma.h @@ -1,5 +1,5 @@ #if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_TEGRA_APM_DMA_H +#define _TRACE_TEGRA_APB_DMA_H
#include <linux/tracepoint.h> #include <linux/dmaengine.h> @@ -55,7 +55,7 @@ TRACE_EVENT(tegra_dma_isr, TP_printk("%s: irq %d\n", __get_str(chan), __entry->irq) );
-#endif /* _TRACE_TEGRADMA_H */ +#endif /* _TRACE_TEGRA_APB_DMA_H */
/* This part must be outside protection */ #include <trace/define_trace.h>
From: Kent Russell kent.russell@amd.com
[ Upstream commit d65848657c3da5c0d4b685f823d0230f151ab34e ]
This was missed during the addition of VegaM support
Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Kent Russell kent.russell@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4b192e0ce92f4..ed7977d0dd018 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1148,7 +1148,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( adev->asic_type != CHIP_FIJI && adev->asic_type != CHIP_POLARIS10 && adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12) ? + adev->asic_type != CHIP_POLARIS12 && + adev->asic_type != CHIP_VEGAM) ? VI_BO_SIZE_ALIGN : 1;
mapping_flags = AMDGPU_VM_PAGE_READABLE;
From: Evan Quan evan.quan@amd.com
[ Upstream commit 479156f2e5540077377a823eaf5a4263bd329063 ]
DPM state relates are not supported on the new SW SMU ASICs. But still it's not OK to trigger null pointer dereference on accessing them.
Signed-off-by: Evan Quan evan.quan@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 ++- 2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index abeaab4bf1bc2..d55519bc34e52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -144,12 +144,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm;
- if (is_support_sw_smu(adev) && adev->smu.ppt_funcs->get_current_power_state) - pm = amdgpu_smu_get_current_power_state(adev); - else if (adev->powerplay.pp_funcs->get_current_power_state) + if (is_support_sw_smu(adev)) { + if (adev->smu.ppt_funcs->get_current_power_state) + pm = amdgpu_smu_get_current_power_state(adev); + else + pm = adev->pm.dpm.user_state; + } else if (adev->powerplay.pp_funcs->get_current_power_state) { pm = amdgpu_dpm_get_current_power_state(adev); - else + } else { pm = adev->pm.dpm.user_state; + }
return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : @@ -176,7 +180,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, goto fail; }
- if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm.user_state = state; + mutex_unlock(&adev->pm.mutex); + } else if (adev->powerplay.pp_funcs->dispatch_tasks) { amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state); } else { mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index eec329ab60370..61a6d183c153f 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -63,7 +63,8 @@ int smu_get_power_num_states(struct smu_context *smu,
/* not support power state */ memset(state_info, 0, sizeof(struct pp_states_info)); - state_info->nums = 0; + state_info->nums = 1; + state_info->states[0] = POWER_STATE_TYPE_DEFAULT;
return 0; }
From: Christian König christian.koenig@amd.com
[ Upstream commit 67d0859e2758ef992fd32499747ce4b1038a63c0 ]
We always need to drop the ctx reference and should check for errors first and then dereference the fence pointer.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Chunming Zhou david1.zhou@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6239b6be6fe..fe028561dc0e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1093,29 +1093,27 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, return r; }
- fence = amdgpu_ctx_get_fence(ctx, entity, - deps[i].handle); + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue;
if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + struct drm_sched_fence *s_fence; struct dma_fence *old = fence;
+ s_fence = to_drm_sched_fence(fence); fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(old); }
- if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + dma_fence_put(fence); + if (r) return r; - } else if (fence) { - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, - true); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } } return 0; }
From: Wang Xiayang xywang.sjtu@sjtu.edu.cn
[ Upstream commit 929e571c04c285861e0bb049a396a2bdaea63282 ]
Coccinelle reports a path that the array "data" is never initialized. The path skips the checks in the conditional branches when either of callback functions, read_wave_vgprs and read_wave_sgprs, is not registered. Later, the uninitialized "data" array is read in the while-loop below and passed to put_user().
Fix the path by allocating the array with kcalloc().
The patch is simplier than adding a fall-back branch that explicitly calls memset(data, 0, ...). Also it does not need the multiplication 1024*sizeof(*data) as the size parameter for memset() though there is no risk of integer overflow.
Signed-off-by: Wang Xiayang xywang.sjtu@sjtu.edu.cn Reviewed-by: Chunming Zhou david1.zhou@amd.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8930d66f22040..91bfb24f963e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -703,7 +703,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, thread = (*pos & GENMASK_ULL(59, 52)) >> 52; bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
- data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM;
From: Miquel Raynal miquel.raynal@bootlin.com
[ Upstream commit 090bb803708198e5ab6b0046398c7ed9f4d12d6b ]
Retrieving PHYs can defer the probe, do not spawn an error when -EPROBE_DEFER is returned, it is normal behavior.
Fixes: b1a9edbda040 ("ata: libahci: allow to use multiple PHYs") Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Miquel Raynal miquel.raynal@bootlin.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/libahci_platform.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 72312ad2e142d..c25cdbf817f18 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -338,6 +338,9 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port, hpriv->phys[port] = NULL; rc = 0; break; + case -EPROBE_DEFER: + /* Do not complain yet */ + break;
default: dev_err(dev,
From: Masahiro Yamada yamada.masahiro@socionext.com
[ Upstream commit cb4819934a7f9b87876f11ed05b8624c0114551b ]
KBUILD_EXTRA_SYMBOLS makes sense only when building external modules. Moreover, the modpost sets 'external_module' if the -e option is given.
I replaced $(patsubst %, -e %,...) with simpler $(addprefix -e,...) while I was here.
Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index fec6ec2ffa47d..b7e71545733b8 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -75,7 +75,7 @@ modpost = scripts/mod/modpost \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ + $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_MODPOST_WARN),-w)
From: Stephen Boyd swboyd@chromium.org
[ Upstream commit e8de12fb7cde2c85bc31097cd098da79a4818305 ]
If the particular version of clang a user has doesn't enable -Werror=unknown-warning-option by default, even though it is the default[1], then make sure to pass the option to the Kconfig cc-option command so that testing options from Kconfig files works properly. Otherwise, depending on the default values setup in the clang toolchain we will silently assume options such as -Wmaybe-uninitialized are supported by clang, when they really aren't.
A compilation issue only started happening for me once commit 589834b3a009 ("kbuild: Add -Werror=unknown-warning-option to CLANG_FLAGS") was applied on top of commit b303c6df80c9 ("kbuild: compute false-positive -Wmaybe-uninitialized cases in Kconfig"). This leads kbuild to try and test for the existence of the -Wmaybe-uninitialized flag with the cc-option command in scripts/Kconfig.include, and it doesn't see an error returned from the option test so it sets the config value to Y. Then the Makefile tries to pass the unknown option on the command line and -Werror=unknown-warning-option catches the invalid option and breaks the build. Before commit 589834b3a009 ("kbuild: Add -Werror=unknown-warning-option to CLANG_FLAGS") the build works fine, but any cc-option test of a warning option in Kconfig files silently evaluates to true, even if the warning option flag isn't supported on clang.
Note: This doesn't change cc-option usages in Makefiles because those use a different rule that includes KBUILD_CFLAGS by default (see the __cc-option command in scripts/Kbuild.incluide). The KBUILD_CFLAGS variable already has the -Werror=unknown-warning-option flag set. Thanks to Doug for pointing out the different rule.
[1] https://clang.llvm.org/docs/DiagnosticsReference.html#wunknown-warning-optio... Cc: Peter Smith peter.smith@linaro.org Cc: Nick Desaulniers ndesaulniers@google.com Cc: Douglas Anderson dianders@chromium.org Signed-off-by: Stephen Boyd swboyd@chromium.org Reviewed-by: Nathan Chancellor natechancellor@gmail.com Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/Kconfig.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 8a5c4d645eb14..4bbf4fc163a29 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -25,7 +25,7 @@ failure = $(if-success,$(1),n,y)
# $(cc-option,<flag>) # Return y if the compiler supports <flag>, n otherwise -cc-option = $(success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null) +cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -E -x c /dev/null -o /dev/null)
# $(ld-option,<flag>) # Return y if the linker supports <flag>, n otherwise
From: Qian Cai cai@lca.pw
[ Upstream commit f1d4836201543e88ebe70237e67938168d5fab19 ]
GCC throws out this warning on arm64.
drivers/firmware/efi/libstub/arm-stub.c: In function 'efi_entry': drivers/firmware/efi/libstub/arm-stub.c:132:22: warning: variable 'si' set but not used [-Wunused-but-set-variable]
Fix it by making free_screen_info() a static inline function.
Acked-by: Will Deacon will@kernel.org Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/efi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index c9e9a6978e73e..d3cb42fd51ec2 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -105,7 +105,11 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
#define alloc_screen_info(x...) &screen_info -#define free_screen_info(x...) + +static inline void free_screen_info(efi_system_table_t *sys_table_arg, + struct screen_info *si) +{ +}
/* redeclare as 'hidden' so the compiler will generate relative references */ extern struct screen_info screen_info __attribute__((__visibility__("hidden")));
From: Rob Clark robdclark@chromium.org
[ Upstream commit 7e9e5ead55beacc11116b3fb90b0de6e7cf55a69 ]
drm_cflush_pages() is no-op on arm/arm64. But instead we can use dma_sync API.
Fixes failures w/ vgem_test.
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Rob Clark robdclark@chromium.org Signed-off-by: Sean Paul seanpaul@chromium.org Link: https://patchwork.freedesktop.org/patch/msgid/20190717211542.30482-1-robdcla... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 11a8f99ba18c5..fc04803ff4035 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -47,10 +47,16 @@ static struct vgem_device { struct platform_device *platform; } *vgem_device;
+static void sync_and_unpin(struct drm_vgem_gem_object *bo); +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo); + static void vgem_gem_free_object(struct drm_gem_object *obj) { struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
+ if (!obj->import_attach) + sync_and_unpin(vgem_obj); + kvfree(vgem_obj->pages); mutex_destroy(&vgem_obj->pages_lock);
@@ -78,40 +84,15 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS;
mutex_lock(&obj->pages_lock); + if (!obj->pages) + pin_and_sync(obj); if (obj->pages) { get_page(obj->pages[page_offset]); vmf->page = obj->pages[page_offset]; ret = 0; } mutex_unlock(&obj->pages_lock); - if (ret) { - struct page *page; - - page = shmem_read_mapping_page( - file_inode(obj->base.filp)->i_mapping, - page_offset); - if (!IS_ERR(page)) { - vmf->page = page; - ret = 0; - } else switch (PTR_ERR(page)) { - case -ENOSPC: - case -ENOMEM: - ret = VM_FAULT_OOM; - break; - case -EBUSY: - ret = VM_FAULT_RETRY; - break; - case -EFAULT: - case -EINVAL: - ret = VM_FAULT_SIGBUS; - break; - default: - WARN_ON(PTR_ERR(page)); - ret = VM_FAULT_SIGBUS; - break; - }
- } return ret; }
@@ -277,32 +258,93 @@ static const struct file_operations vgem_driver_fops = { .release = drm_release, };
-static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) +/* Called under pages_lock, except in free path (where it can't race): */ +static void sync_and_unpin(struct drm_vgem_gem_object *bo) { - mutex_lock(&bo->pages_lock); - if (bo->pages_pin_count++ == 0) { - struct page **pages; + struct drm_device *dev = bo->base.dev; + + if (bo->table) { + dma_sync_sg_for_cpu(dev->dev, bo->table->sgl, + bo->table->nents, DMA_BIDIRECTIONAL); + sg_free_table(bo->table); + kfree(bo->table); + bo->table = NULL; + } + + if (bo->pages) { + drm_gem_put_pages(&bo->base, bo->pages, true, true); + bo->pages = NULL; + } +} + +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo) +{ + struct drm_device *dev = bo->base.dev; + int npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + struct sg_table *sgt; + + WARN_ON(!mutex_is_locked(&bo->pages_lock)); + + pages = drm_gem_get_pages(&bo->base); + if (IS_ERR(pages)) { + bo->pages_pin_count--; + mutex_unlock(&bo->pages_lock); + return pages; + }
- pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) { - bo->pages_pin_count--; - mutex_unlock(&bo->pages_lock); - return pages; - } + sgt = drm_prime_pages_to_sg(pages, npages); + if (IS_ERR(sgt)) { + dev_err(dev->dev, + "failed to allocate sgt: %ld\n", + PTR_ERR(bo->table)); + drm_gem_put_pages(&bo->base, pages, false, false); + mutex_unlock(&bo->pages_lock); + return ERR_CAST(bo->table); + } + + /* + * Flush the object from the CPU cache so that importers + * can rely on coherent indirect access via the exported + * dma-address. + */ + dma_sync_sg_for_device(dev->dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); + + bo->pages = pages; + bo->table = sgt; + + return pages; +} + +static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) +{ + struct page **pages;
- bo->pages = pages; + mutex_lock(&bo->pages_lock); + if (bo->pages_pin_count++ == 0 && !bo->pages) { + pages = pin_and_sync(bo); + } else { + WARN_ON(!bo->pages); + pages = bo->pages; } mutex_unlock(&bo->pages_lock);
- return bo->pages; + return pages; }
static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) { + /* + * We shouldn't hit this for imported bo's.. in the import + * case we don't own the scatter-table + */ + WARN_ON(bo->base.import_attach); + mutex_lock(&bo->pages_lock); if (--bo->pages_pin_count == 0) { - drm_gem_put_pages(&bo->base, bo->pages, true, true); - bo->pages = NULL; + WARN_ON(!bo->table); + sync_and_unpin(bo); } mutex_unlock(&bo->pages_lock); } @@ -310,18 +352,12 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) static int vgem_prime_pin(struct drm_gem_object *obj) { struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - long n_pages = obj->size >> PAGE_SHIFT; struct page **pages;
pages = vgem_pin_pages(bo); if (IS_ERR(pages)) return PTR_ERR(pages);
- /* Flush the object from the CPU cache so that importers can rely - * on coherent indirect access via the exported dma-address. - */ - drm_clflush_pages(pages, n_pages); - return 0; }
please don't queue this one for stable branches.. it was causing problems in intel CI
BR, -R
On Tue, Aug 6, 2019 at 2:34 PM Sasha Levin sashal@kernel.org wrote:
From: Rob Clark robdclark@chromium.org
[ Upstream commit 7e9e5ead55beacc11116b3fb90b0de6e7cf55a69 ]
drm_cflush_pages() is no-op on arm/arm64. But instead we can use dma_sync API.
Fixes failures w/ vgem_test.
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Rob Clark robdclark@chromium.org Signed-off-by: Sean Paul seanpaul@chromium.org Link: https://patchwork.freedesktop.org/patch/msgid/20190717211542.30482-1-robdcla... Signed-off-by: Sasha Levin sashal@kernel.org
drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 11a8f99ba18c5..fc04803ff4035 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -47,10 +47,16 @@ static struct vgem_device { struct platform_device *platform; } *vgem_device;
+static void sync_and_unpin(struct drm_vgem_gem_object *bo); +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo);
static void vgem_gem_free_object(struct drm_gem_object *obj) { struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
if (!obj->import_attach)
sync_and_unpin(vgem_obj);
kvfree(vgem_obj->pages); mutex_destroy(&vgem_obj->pages_lock);
@@ -78,40 +84,15 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS;
mutex_lock(&obj->pages_lock);
if (!obj->pages)
pin_and_sync(obj); if (obj->pages) { get_page(obj->pages[page_offset]); vmf->page = obj->pages[page_offset]; ret = 0; } mutex_unlock(&obj->pages_lock);
if (ret) {
struct page *page;
page = shmem_read_mapping_page(
file_inode(obj->base.filp)->i_mapping,
page_offset);
if (!IS_ERR(page)) {
vmf->page = page;
ret = 0;
} else switch (PTR_ERR(page)) {
case -ENOSPC:
case -ENOMEM:
ret = VM_FAULT_OOM;
break;
case -EBUSY:
ret = VM_FAULT_RETRY;
break;
case -EFAULT:
case -EINVAL:
ret = VM_FAULT_SIGBUS;
break;
default:
WARN_ON(PTR_ERR(page));
ret = VM_FAULT_SIGBUS;
break;
}
} return ret;
}
@@ -277,32 +258,93 @@ static const struct file_operations vgem_driver_fops = { .release = drm_release, };
-static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) +/* Called under pages_lock, except in free path (where it can't race): */ +static void sync_and_unpin(struct drm_vgem_gem_object *bo) {
mutex_lock(&bo->pages_lock);
if (bo->pages_pin_count++ == 0) {
struct page **pages;
struct drm_device *dev = bo->base.dev;
if (bo->table) {
dma_sync_sg_for_cpu(dev->dev, bo->table->sgl,
bo->table->nents, DMA_BIDIRECTIONAL);
sg_free_table(bo->table);
kfree(bo->table);
bo->table = NULL;
}
if (bo->pages) {
drm_gem_put_pages(&bo->base, bo->pages, true, true);
bo->pages = NULL;
}
+}
+static struct page **pin_and_sync(struct drm_vgem_gem_object *bo) +{
struct drm_device *dev = bo->base.dev;
int npages = bo->base.size >> PAGE_SHIFT;
struct page **pages;
struct sg_table *sgt;
WARN_ON(!mutex_is_locked(&bo->pages_lock));
pages = drm_gem_get_pages(&bo->base);
if (IS_ERR(pages)) {
bo->pages_pin_count--;
mutex_unlock(&bo->pages_lock);
return pages;
}
pages = drm_gem_get_pages(&bo->base);
if (IS_ERR(pages)) {
bo->pages_pin_count--;
mutex_unlock(&bo->pages_lock);
return pages;
}
sgt = drm_prime_pages_to_sg(pages, npages);
if (IS_ERR(sgt)) {
dev_err(dev->dev,
"failed to allocate sgt: %ld\n",
PTR_ERR(bo->table));
drm_gem_put_pages(&bo->base, pages, false, false);
mutex_unlock(&bo->pages_lock);
return ERR_CAST(bo->table);
}
/*
* Flush the object from the CPU cache so that importers
* can rely on coherent indirect access via the exported
* dma-address.
*/
dma_sync_sg_for_device(dev->dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
bo->pages = pages;
bo->table = sgt;
return pages;
+}
+static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) +{
struct page **pages;
bo->pages = pages;
mutex_lock(&bo->pages_lock);
if (bo->pages_pin_count++ == 0 && !bo->pages) {
pages = pin_and_sync(bo);
} else {
WARN_ON(!bo->pages);
pages = bo->pages; } mutex_unlock(&bo->pages_lock);
return bo->pages;
return pages;
}
static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) {
/*
* We shouldn't hit this for imported bo's.. in the import
* case we don't own the scatter-table
*/
WARN_ON(bo->base.import_attach);
mutex_lock(&bo->pages_lock); if (--bo->pages_pin_count == 0) {
drm_gem_put_pages(&bo->base, bo->pages, true, true);
bo->pages = NULL;
WARN_ON(!bo->table);
sync_and_unpin(bo); } mutex_unlock(&bo->pages_lock);
} @@ -310,18 +352,12 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) static int vgem_prime_pin(struct drm_gem_object *obj) { struct drm_vgem_gem_object *bo = to_vgem_bo(obj);
long n_pages = obj->size >> PAGE_SHIFT; struct page **pages; pages = vgem_pin_pages(bo); if (IS_ERR(pages)) return PTR_ERR(pages);
/* Flush the object from the CPU cache so that importers can rely
* on coherent indirect access via the exported dma-address.
*/
drm_clflush_pages(pages, n_pages);
return 0;
}
-- 2.20.1
On Tue, Aug 06, 2019 at 03:45:48PM -0700, Rob Clark wrote:
please don't queue this one for stable branches.. it was causing problems in intel CI
Now dropped, thank you.
-- Thanks, Sasha
From: Mao Han han_mao@c-sky.com
[ Upstream commit b399abe7c21e248dc6224cadc9a378a2beb10cfd ]
This patch fix following perf record error by linking vdso.so with build id.
perf.data perf.data.old [ perf record: Woken up 1 times to write data ] free(): double free detected in tcache 2 Aborted
perf record use filename__read_build_id(util/symbol-minimal.c) to get build id when libelf is not supported. When vdso.so is linked without build id, the section size of PT_NOTE will be zero, buf size will realloc to zero and cause memory corruption.
Signed-off-by: Mao Han han_mao@c-sky.com Cc: Paul Walmsley paul.walmsley@sifive.com Cc: Palmer Dabbelt palmer@sifive.com Cc: Albert Ou aou@eecs.berkeley.edu Signed-off-by: Paul Walmsley paul.walmsley@sifive.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index f1d6ffe43e428..49a5852fd07dd 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,7 +37,7 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE # these symbols in the kernel code rather than hand-coded addresses.
SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--hash-style=both + -Wl,--build-id -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold)
From: Rayagonda Kokatanur rayagonda.kokatanur@broadcom.com
[ Upstream commit fd01eecdf9591453177d7b06faaabef8c300114a ]
Use SMBUS_MASTER_DATA_READ.MASTER_RD_STATUS bit to check for RX FIFO empty condition because SMBUS_MASTER_FIFO_CONTROL.MASTER_RX_PKT_COUNT is not updated for read >= 64 bytes. This fixes the issue when trying to read from the I2C slave more than 63 bytes.
Fixes: c24b8d574b7c ("i2c: iproc: Extend I2C read up to 255 bytes") Cc: stable@kernel.org Signed-off-by: Rayagonda Kokatanur rayagonda.kokatanur@broadcom.com Reviewed-by: Ray Jui ray.jui@broadcom.com Signed-off-by: Wolfram Sang wsa@the-dreams.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-bcm-iproc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index a845b8decac8f..ad1681872e39d 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -403,16 +403,18 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, static void bcm_iproc_i2c_read_valid_bytes(struct bcm_iproc_i2c_dev *iproc_i2c) { struct i2c_msg *msg = iproc_i2c->msg; + uint32_t val;
/* Read valid data from RX FIFO */ while (iproc_i2c->rx_bytes < msg->len) { - if (!((iproc_i2c_rd_reg(iproc_i2c, M_FIFO_CTRL_OFFSET) >> M_FIFO_RX_CNT_SHIFT) - & M_FIFO_RX_CNT_MASK)) + val = iproc_i2c_rd_reg(iproc_i2c, M_RX_OFFSET); + + /* rx fifo empty */ + if (!((val >> M_RX_STATUS_SHIFT) & M_RX_STATUS_MASK)) break;
msg->buf[iproc_i2c->rx_bytes] = - (iproc_i2c_rd_reg(iproc_i2c, M_RX_OFFSET) >> - M_RX_DATA_SHIFT) & M_RX_DATA_MASK; + (val >> M_RX_DATA_SHIFT) & M_RX_DATA_MASK; iproc_i2c->rx_bytes++; } }
From: Julien Thierry julien.thierry.kdev@gmail.com
[ Upstream commit 677379bc9139ac24b310a281fcb21a2f04288353 ]
On a system with two security states, if SCR_EL3.FIQ is cleared, non-secure IRQ priorities get shifted to fit the secure view but priority masks aren't.
On such system, it turns out that GIC_PRIO_IRQON masks the priority of normal interrupts, which obviously ends up in a hang.
Increase GIC_PRIO_IRQON value (i.e. lower priority) to make sure interrupts are not blocked by it.
Cc: Oleg Nesterov oleg@redhat.com Fixes: bd82d4bd21880b7c ("arm64: Fix incorrect irqflag restore for priority masking") Acked-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Julien Thierry julien.thierry.kdev@gmail.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com [will: fixed Fixes: tag] Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/arch_gicv3.h | 6 ++++++ arch/arm64/include/asm/ptrace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 79155a8cfe7c0..89e4c8b793490 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -155,6 +155,12 @@ static inline void gic_pmr_mask_irqs(void) BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET)); BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); + /* + * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared + * and non-secure PMR accesses are not subject to the shifts that + * are applied to IRQ priorities + */ + BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); }
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 81693244f58d6..701eaa7381876 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -30,7 +30,7 @@ * in the the priority mask, it indicates that PSR.I should be set and * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQON 0xe0 #define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) #define GIC_PRIO_PSR_I_SET (1 << 4)
From: Masami Hiramatsu mhiramat@kernel.org
[ Upstream commit ee07b93e7721ccd5d5b9fa6f0c10cb3fe2f1f4f9 ]
Prohibit probing on return_address() and subroutines which is called from return_address(), since the it is invoked from trace_hardirqs_off() which is also kprobe blacklisted.
Reported-by: Naresh Kamboju naresh.kamboju@linaro.org Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/return_address.c | 3 +++ arch/arm64/kernel/stacktrace.c | 3 +++ 2 files changed, 6 insertions(+)
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index b21cba90f82dd..491184a9f0812 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -8,6 +8,7 @@
#include <linux/export.h> #include <linux/ftrace.h> +#include <linux/kprobes.h>
#include <asm/stack_pointer.h> #include <asm/stacktrace.h> @@ -29,6 +30,7 @@ static int save_return_addr(struct stackframe *frame, void *d) return 0; } } +NOKPROBE_SYMBOL(save_return_addr);
void *return_address(unsigned int level) { @@ -52,3 +54,4 @@ void *return_address(unsigned int level) return NULL; } EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 62d395151abe6..cd7dab54d17b3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> @@ -73,6 +74,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
return 0; } +NOKPROBE_SYMBOL(unwind_frame);
void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) @@ -87,6 +89,7 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, break; } } +NOKPROBE_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE struct stack_trace_data {
From: Qian Cai cai@lca.pw
[ Upstream commit 7d4e2dcf311d3b98421d1f119efe5964cafa32fc ]
GCC throws a warning,
arch/arm64/mm/mmu.c: In function 'pud_free_pmd_page': arch/arm64/mm/mmu.c:1033:8: warning: variable 'pud' set but not used [-Wunused-but-set-variable] pud_t pud; ^~~
because pud_table() is a macro and compiled away. Fix it by making it a static inline function and for pud_sect() as well.
Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fca26759081a7..b9574d850f14f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -419,8 +419,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_SECT)
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 -#define pud_sect(pud) (0) -#define pud_table(pud) (1) +static inline bool pud_sect(pud_t pud) { return false; } +static inline bool pud_table(pud_t pud) { return true; } #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT)
From: Qian Cai cai@lca.pw
[ Upstream commit 7732d20a160c76006c7fe7bca5178aea6af1d2e8 ]
When CONFIG_KASAN_SW_TAGS=n, set_tag() is compiled away. GCC throws a warning,
mm/kasan/common.c: In function '__kasan_kmalloc': mm/kasan/common.c:464:5: warning: variable 'tag' set but not used [-Wunused-but-set-variable] u8 tag = 0xff; ^~~
Fix it by making __tag_set() a static inline function the same as arch_kasan_set_tag() in mm/kasan/kasan.h for consistency because there is a macro in arch/arm64/include/asm/kasan.h,
#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
However, when CONFIG_DEBUG_VIRTUAL=n and CONFIG_SPARSEMEM_VMEMMAP=y, page_to_virt() will call __tag_set() with incorrect type of a parameter, so fix that as well. Also, still let page_to_virt() return "void *" instead of "const void *", so will not need to add a similar cast in lowmem_page_address().
Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/memory.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b7ba75809751e..fb04f10a78ab3 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -210,7 +210,11 @@ extern u64 vabits_user; #define __tag_reset(addr) untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -#define __tag_set(addr, tag) (addr) +static inline const void *__tag_set(const void *addr, u8 tag) +{ + return addr; +} + #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 #endif @@ -301,8 +305,8 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - unsigned long __addr_tag = \ - __tag_set(__addr, page_kasan_tag(page)); \ + const void *__addr_tag = \ + __tag_set((void *)__addr, page_kasan_tag(page)); \ ((void *)__addr_tag); \ })
From: "Luck, Tony" tony.luck@intel.com
[ Upstream commit 61f259821dd3306e49b7d42a3f90fb5a4ff3351b ]
Some processors may mispredict an array bounds check and speculatively access memory that they should not. With a user supplied array index we like to play things safe by masking the value with the array size before it is used as an index.
Signed-off-by: Tony Luck tony.luck@intel.com Link: https://lore.kernel.org/r/20190731043957.GA1600@agluck-desk2.amr.corp.intel.... Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/core/user_mad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 671f07ba1fad6..025b6d86a61fc 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -49,6 +49,7 @@ #include <linux/sched.h> #include <linux/semaphore.h> #include <linux/slab.h> +#include <linux/nospec.h>
#include <linux/uaccess.h>
@@ -883,11 +884,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg)) return -EFAULT; + if (id >= IB_UMAD_MAX_AGENTS) + return -EINVAL;
mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex);
- if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + id = array_index_nospec(id, IB_UMAD_MAX_AGENTS); + if (!__get_agent(file, id)) { ret = -EINVAL; goto out; }
From: Guy Levi guyle@mellanox.com
[ Upstream commit e5366d309a772fef264ec85e858f9ea46f939848 ]
Driver shouldn't allow to use UMR to register a MR when umr_modify_atomic_disabled is set. Otherwise it will always end up with a failure in the post send flow which sets the UMR WQE to modify atomic access right.
Fixes: c8d75a980fab ("IB/mlx5: Respect new UMR capabilities") Signed-off-by: Guy Levi guyle@mellanox.com Reviewed-by: Moni Shoua monis@mellanox.com Signed-off-by: Leon Romanovsky leonro@mellanox.com Link: https://lore.kernel.org/r/20190731081929.32559-1-leon@kernel.org Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/mlx5/mr.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5f09699fab984..e5fd847d284f0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -51,22 +51,12 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); -}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); }
-static bool use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= mr_cache_max_order(dev) && - umr_can_modify_entity_size(dev); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); @@ -1268,7 +1258,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool populate_mtts = false; + bool use_umr; struct ib_umem *umem; int page_shift; int npages; @@ -1300,29 +1290,30 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err);
- if (use_umr(dev, order)) { + use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && + (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || + !MLX5_CAP_GEN(dev->mdev, atomic)); + + if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } - populate_mtts = false; } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } - populate_mtts = true; + use_umr = false; }
if (!mr) { - if (!umr_can_modify_entity_size(dev)) - populate_mtts = true; mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, populate_mtts); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); }
@@ -1338,7 +1329,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
- if (!populate_mtts) { + if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
if (access_flags & IB_ACCESS_ON_DEMAND)
From: Gal Pressman galpress@amazon.com
[ Upstream commit 52e0a118a20308dd6aa531e20a5ab5907d2264c8 ]
The check for QP type different than XRC has excluded driver QP types from the resource tracker. As a result, "rdma resource show" user command would not show opened driver QPs which does not reflect the real state of the system.
Check QP type explicitly instead of assuming enum values/ordering.
Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Signed-off-by: Gal Pressman galpress@amazon.com Reviewed-by: Leon Romanovsky leonro@mellanox.com Link: https://lore.kernel.org/r/20190801104354.11417-1-galpress@amazon.com Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/core/core_priv.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index ff40a450b5d28..ff9e0d7fb4f31 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -292,7 +292,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_udata *udata, struct ib_uobject *uobj) { + enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; + bool is_xrc;
if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -310,7 +312,8 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, * and more importantly they are created internaly by driver, * see mlx5 create_dev_resources() as an example. */ - if (attr->qp_type < IB_QPT_XRC_INI) { + is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; + if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { qp->res.type = RDMA_RESTRACK_QP; if (uobj) rdma_restrack_uadd(&qp->res);
From: Jack Morgenstein jackm@dev.mellanox.co.il
[ Upstream commit 770b7d96cfff6a8bf6c9f261ba6f135dc9edf484 ]
We encountered a use-after-free bug when unloading the driver:
[ 3562.116059] BUG: KASAN: use-after-free in ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.117233] Read of size 4 at addr ffff8882ca5aa868 by task kworker/u13:2/23862 [ 3562.118385] [ 3562.119519] CPU: 2 PID: 23862 Comm: kworker/u13:2 Tainted: G OE 5.1.0-for-upstream-dbg-2019-05-19_16-44-30-13 #1 [ 3562.121806] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 3562.123075] Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] [ 3562.124383] Call Trace: [ 3562.125640] dump_stack+0x9a/0xeb [ 3562.126911] print_address_description+0xe3/0x2e0 [ 3562.128223] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.129545] __kasan_report+0x15c/0x1df [ 3562.130866] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.132174] kasan_report+0xe/0x20 [ 3562.133514] ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.134835] ? find_mad_agent+0xa00/0xa00 [ib_core] [ 3562.136158] ? qlist_free_all+0x51/0xb0 [ 3562.137498] ? mlx4_ib_sqp_comp_worker+0x1970/0x1970 [mlx4_ib] [ 3562.138833] ? quarantine_reduce+0x1fa/0x270 [ 3562.140171] ? kasan_unpoison_shadow+0x30/0x40 [ 3562.141522] ib_mad_recv_done+0xdf6/0x3000 [ib_core] [ 3562.142880] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.144277] ? ib_mad_send_done+0x1810/0x1810 [ib_core] [ 3562.145649] ? mlx4_ib_destroy_cq+0x2a0/0x2a0 [mlx4_ib] [ 3562.147008] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.148380] ? debug_object_deactivate+0x2b9/0x4a0 [ 3562.149814] __ib_process_cq+0xe2/0x1d0 [ib_core] [ 3562.151195] ib_cq_poll_work+0x45/0xf0 [ib_core] [ 3562.152577] process_one_work+0x90c/0x1860 [ 3562.153959] ? pwq_dec_nr_in_flight+0x320/0x320 [ 3562.155320] worker_thread+0x87/0xbb0 [ 3562.156687] ? __kthread_parkme+0xb6/0x180 [ 3562.158058] ? process_one_work+0x1860/0x1860 [ 3562.159429] kthread+0x320/0x3e0 [ 3562.161391] ? kthread_park+0x120/0x120 [ 3562.162744] ret_from_fork+0x24/0x30 ... [ 3562.187615] Freed by task 31682: [ 3562.188602] save_stack+0x19/0x80 [ 3562.189586] __kasan_slab_free+0x11d/0x160 [ 3562.190571] kfree+0xf5/0x2f0 [ 3562.191552] ib_mad_port_close+0x200/0x380 [ib_core] [ 3562.192538] ib_mad_remove_device+0xf0/0x230 [ib_core] [ 3562.193538] remove_client_context+0xa6/0xe0 [ib_core] [ 3562.194514] disable_device+0x14e/0x260 [ib_core] [ 3562.195488] __ib_unregister_device+0x79/0x150 [ib_core] [ 3562.196462] ib_unregister_device+0x21/0x30 [ib_core] [ 3562.197439] mlx4_ib_remove+0x162/0x690 [mlx4_ib] [ 3562.198408] mlx4_remove_device+0x204/0x2c0 [mlx4_core] [ 3562.199381] mlx4_unregister_interface+0x49/0x1d0 [mlx4_core] [ 3562.200356] mlx4_ib_cleanup+0xc/0x1d [mlx4_ib] [ 3562.201329] __x64_sys_delete_module+0x2d2/0x400 [ 3562.202288] do_syscall_64+0x95/0x470 [ 3562.203277] entry_SYSCALL_64_after_hwframe+0x49/0xbe
The problem was that the MAD PD was deallocated before the MAD CQ. There was completion work pending for the CQ when the PD got deallocated. When the mad completion handling reached procedure ib_mad_post_receive_mads(), we got a use-after-free bug in the following line of code in that procedure: sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; (the pd pointer in the above line is no longer valid, because the pd has been deallocated).
We fix this by allocating the PD before the CQ in procedure ib_mad_port_open(), and deallocating the PD after freeing the CQ in procedure ib_mad_port_close().
Since the CQ completion work queue is flushed during ib_free_cq(), no completions will be pending for that CQ when the PD is later deallocated.
Note that freeing the CQ before deallocating the PD is the practice in the ULPs.
Fixes: 4be90bc60df4 ("IB/mad: Remove ib_get_dma_mr calls") Signed-off-by: Jack Morgenstein jackm@dev.mellanox.co.il Signed-off-by: Leon Romanovsky leonro@mellanox.com Link: https://lore.kernel.org/r/20190801121449.24973-1-leon@kernel.org Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/core/mad.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index cc99479b2c09d..9947d16edef21 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3224,18 +3224,18 @@ static int ib_mad_port_open(struct ib_device *device, if (has_smi) cq_size *= 2;
+ port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error3; + } + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device, 0); - if (IS_ERR(port_priv->pd)) { - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); - ret = PTR_ERR(port_priv->pd); goto error4; }
@@ -3278,11 +3278,11 @@ static int ib_mad_port_open(struct ib_device *device, error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dealloc_pd(port_priv->pd); -error4: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv);
@@ -3312,8 +3312,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dealloc_pd(port_priv->pd); ib_free_cq(port_priv->cq); + ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */
From: Leon Romanovsky leonro@mellanox.com
[ Upstream commit 23eaf3b5c1a755e3193480c76fb29414be648688 ]
The below kernel panic was observed when created bond mode LACP with GRE tunnel on top. The reason to it was not released spinlock during mlx5 notify unregsiter sequence.
[ 234.562007] BUG: scheduling while atomic: sh/10900/0x00000002 [ 234.563005] Preemption disabled at: [ 234.566864] ------------[ cut here ]------------ [ 234.567120] DEBUG_LOCKS_WARN_ON(val > preempt_count()) [ 234.567139] WARNING: CPU: 16 PID: 10900 at kernel/sched/core.c:3203 preempt_count_sub+0xca/0x170 [ 234.569550] CPU: 16 PID: 10900 Comm: sh Tainted: G W 5.2.0-rc1-for-linust-dbg-2019-05-25_04-57-33-60 #1 [ 234.569886] Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.6.1 02/12/2018 [ 234.570183] RIP: 0010:preempt_count_sub+0xca/0x170 [ 234.570404] Code: 03 38 d0 7c 08 84 d2 0f 85 b0 00 00 00 8b 15 dd 02 03 04 85 d2 75 ba 48 c7 c6 00 e1 88 83 48 c7 c7 40 e1 88 83 e8 76 11 f7 ff <0f> 0b 5b c3 65 8b 05 d3 1f d8 7e 84 c0 75 82 e8 62 c3 c3 00 85 c0 [ 234.570911] RSP: 0018:ffff888b94477b08 EFLAGS: 00010286 [ 234.571133] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [ 234.571391] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000246 [ 234.571648] RBP: ffff888ba5560000 R08: fffffbfff08962d5 R09: fffffbfff08962d5 [ 234.571902] R10: 0000000000000001 R11: fffffbfff08962d4 R12: ffff888bac6e9548 [ 234.572157] R13: ffff888babfaf728 R14: ffff888bac6e9568 R15: ffff888babfaf750 [ 234.572412] FS: 00007fcafa59b740(0000) GS:ffff888bed200000(0000) knlGS:0000000000000000 [ 234.572686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 234.572914] CR2: 00007f984f16b140 CR3: 0000000b2bf0a001 CR4: 00000000001606e0 [ 234.573172] Call Trace: [ 234.573336] _raw_spin_unlock+0x2e/0x50 [ 234.573542] mlx5_ib_unbind_slave_port+0x1bc/0x690 [mlx5_ib] [ 234.573793] mlx5_ib_cleanup_multiport_master+0x1d3/0x660 [mlx5_ib] [ 234.574039] mlx5_ib_stage_init_cleanup+0x4c/0x360 [mlx5_ib] [ 234.574271] ? kfree+0xf5/0x2f0 [ 234.574465] __mlx5_ib_remove+0x61/0xd0 [mlx5_ib] [ 234.574688] ? __mlx5_ib_remove+0xd0/0xd0 [mlx5_ib] [ 234.574951] mlx5_remove_device+0x234/0x300 [mlx5_core] [ 234.575224] mlx5_unregister_device+0x4d/0x1e0 [mlx5_core] [ 234.575493] remove_one+0x4f/0x160 [mlx5_core] [ 234.575704] pci_device_remove+0xef/0x2a0 [ 234.581407] ? pcibios_free_irq+0x10/0x10 [ 234.587143] ? up_read+0xc1/0x260 [ 234.592785] device_release_driver_internal+0x1ab/0x430 [ 234.598442] unbind_store+0x152/0x200 [ 234.604064] ? sysfs_kf_write+0x3b/0x180 [ 234.609441] ? sysfs_file_ops+0x160/0x160 [ 234.615021] kernfs_fop_write+0x277/0x440 [ 234.620288] ? __sb_start_write+0x1ef/0x2c0 [ 234.625512] vfs_write+0x15e/0x460 [ 234.630786] ksys_write+0x156/0x1e0 [ 234.635988] ? __ia32_sys_read+0xb0/0xb0 [ 234.641120] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 234.646163] do_syscall_64+0x95/0x470 [ 234.651106] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 234.656004] RIP: 0033:0x7fcaf9c9cfd0 [ 234.660686] Code: 73 01 c3 48 8b 0d c0 6e 2d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d cd cf 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee cb 01 00 48 89 04 24 [ 234.670128] RSP: 002b:00007ffd3b01ddd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 234.674811] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007fcaf9c9cfd0 [ 234.679387] RDX: 000000000000000d RSI: 00007fcafa5c1000 RDI: 0000000000000001 [ 234.683848] RBP: 00007fcafa5c1000 R08: 000000000000000a R09: 00007fcafa59b740 [ 234.688167] R10: 00007ffd3b01d8e0 R11: 0000000000000246 R12: 00007fcaf9f75400 [ 234.692386] R13: 000000000000000d R14: 0000000000000001 R15: 0000000000000000 [ 234.696495] irq event stamp: 153067 [ 234.700525] hardirqs last enabled at (153067): [<ffffffff83258c39>] _raw_spin_unlock_irqrestore+0x59/0x70 [ 234.704665] hardirqs last disabled at (153066): [<ffffffff83259382>] _raw_spin_lock_irqsave+0x22/0x90 [ 234.708722] softirqs last enabled at (153058): [<ffffffff836006c5>] __do_softirq+0x6c5/0xb4e [ 234.712673] softirqs last disabled at (153051): [<ffffffff81227c1d>] irq_exit+0x17d/0x1d0 [ 234.716601] ---[ end trace 5dbf096843ee9ce6 ]---
Fixes: df097a278c75 ("IB/mlx5: Use the new mlx5 core notifier API") Signed-off-by: Leon Romanovsky leonro@mellanox.com Link: https://lore.kernel.org/r/20190731083852.584-1-leon@kernel.org Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/mlx5/main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a6713a3b6c803..9ab276a8bc81a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5687,13 +5687,12 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, return; }
- if (mpi->mdev_events.notifier_call) - mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); - mpi->mdev_events.notifier_call = NULL; - mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock); + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; mlx5_remove_netdev_notifier(ibdev, port_num); spin_lock(&port->mp.mpi_lock);
From: Jeffrey Hugo jeffrey.l.hugo@gmail.com
[ Upstream commit 9ca7ad6c7706edeae331c1632d0c63897418ebad ]
add_gpu_components() adds found GPU nodes from the DT to the match list, regardless of the status of the nodes. This is a problem, because if the nodes are disabled, they should not be on the match list because they will not be matched. This prevents display from initing if a GPU node is defined, but it's status is disabled.
Fix this by checking the node's status before adding it to the match list.
Fixes: dc3ea265b856 (drm/msm: Drop the gpu binding) Reviewed-by: Rob Clark robdclark@gmail.com Signed-off-by: Jeffrey Hugo jeffrey.l.hugo@gmail.com Signed-off-by: Sean Paul seanpaul@chromium.org Link: https://patchwork.freedesktop.org/patch/msgid/20190626180015.45242-1-jeffrey... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/msm/msm_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 4a0fe8a25ad77..a56eef3cfee78 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1267,7 +1267,8 @@ static int add_gpu_components(struct device *dev, if (!np) return 0;
- drm_of_component_match_add(dev, matchptr, compare_of, np); + if (of_device_is_available(np)) + drm_of_component_match_add(dev, matchptr, compare_of, np);
of_node_put(np);
From: Wei Yongjun weiyongjun1@huawei.com
[ Upstream commit 020fb3bebc224dfe9353a56ecbe2d5fac499dffc ]
Fix to return error code -ENOMEM from the rdma_zalloc_drv_obj() error handling case instead of 0, as done elsewhere in this function.
Fixes: e8ac9389f0d7 ("RDMA: Fix allocation failure on pointer pd") Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Signed-off-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Leon Romanovsky leonro@mellanox.com Link: https://lore.kernel.org/r/20190801012725.150493-1-weiyongjun1@huawei.com Signed-off-by: Doug Ledford dledford@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index e068a02122f5e..9496c69fff3a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -745,8 +745,10 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
ibdev = &hr_dev->ib_dev; pd = rdma_zalloc_drv_obj(ibdev, ib_pd); - if (!pd) + if (!pd) { + ret = -ENOMEM; goto alloc_mem_failed; + }
pd->device = ibdev; ret = hns_roce_alloc_pd(pd, NULL);
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 1bbbab097a05276e312dd2462791d32b21ceb1ee ]
Currently the retry counter is not being decremented, leading to a potential infinite spin if the scalar_reads don't change state.
Addresses-Coverity: ("Infinite loop") Fixes: 280e54c9f614 ("drm/exynos: scaler: Reset hardware before starting the operation") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: Inki Dae inki.dae@samsung.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index ec9c1b7d31033..8989f8af716b7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -94,12 +94,12 @@ static inline int scaler_reset(struct scaler_context *scaler) scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); do { cpu_relax(); - } while (retry > 1 && + } while (--retry > 1 && scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); do { cpu_relax(); scaler_write(1, SCALER_INT_EN); - } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1); + } while (--retry > 0 && scaler_read(SCALER_INT_EN) != 1);
return retry ? 0 : -EIO; }
From: Sasha Levin
Sent: 06 August 2019 22:33
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 1bbbab097a05276e312dd2462791d32b21ceb1ee ]
Currently the retry counter is not being decremented, leading to a potential infinite spin if the scalar_reads don't change state.
Addresses-Coverity: ("Infinite loop") Fixes: 280e54c9f614 ("drm/exynos: scaler: Reset hardware before starting the operation") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: Inki Dae inki.dae@samsung.com Signed-off-by: Sasha Levin sashal@kernel.org
drivers/gpu/drm/exynos/exynos_drm_scaler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index ec9c1b7d31033..8989f8af716b7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -94,12 +94,12 @@ static inline int scaler_reset(struct scaler_context *scaler) scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); do { cpu_relax();
- } while (retry > 1 &&
- } while (--retry > 1 && scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); do { cpu_relax(); scaler_write(1, SCALER_INT_EN);
- } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1);
} while (--retry > 0 && scaler_read(SCALER_INT_EN) != 1);
return retry ? 0 : -EIO;
If the first loop hits the retry limit the second loop won't be right and the final return value will be 0.
David
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Wed, Aug 07, 2019 at 08:49:52AM +0000, David Laight wrote:
From: Sasha Levin
Sent: 06 August 2019 22:33
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 1bbbab097a05276e312dd2462791d32b21ceb1ee ]
Currently the retry counter is not being decremented, leading to a potential infinite spin if the scalar_reads don't change state.
Addresses-Coverity: ("Infinite loop") Fixes: 280e54c9f614 ("drm/exynos: scaler: Reset hardware before starting the operation") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: Inki Dae inki.dae@samsung.com Signed-off-by: Sasha Levin sashal@kernel.org
drivers/gpu/drm/exynos/exynos_drm_scaler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index ec9c1b7d31033..8989f8af716b7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -94,12 +94,12 @@ static inline int scaler_reset(struct scaler_context *scaler) scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); do { cpu_relax();
- } while (retry > 1 &&
- } while (--retry > 1 && scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); do { cpu_relax(); scaler_write(1, SCALER_INT_EN);
- } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1);
} while (--retry > 0 && scaler_read(SCALER_INT_EN) != 1);
return retry ? 0 : -EIO;
If the first loop hits the retry limit the second loop won't be right and the final return value will be 0.
This looks like an upstream problem as well, no?
-- Thanks, Sasha
From: Masami Hiramatsu mhiramat@kernel.org
[ Upstream commit b3980e48528c4d2a9e70b145a5bba328b73a0f93 ]
kprobes manipulates the interrupted PSTATE for single step, and doesn't restore it. Thus, if we put a kprobe where the pstate.D (debug) masked, the mask will be cleared after the kprobe hits.
Moreover, in the most complicated case, this can lead a kernel crash with below message when a nested kprobe hits.
[ 152.118921] Unexpected kernel single-step exception at EL1
When the 1st kprobe hits, do_debug_exception() will be called. At this point, debug exception (= pstate.D) must be masked (=1). But if another kprobes hits before single-step of the first kprobe (e.g. inside user pre_handler), it unmask the debug exception (pstate.D = 0) and return. Then, when the 1st kprobe setting up single-step, it saves current DAIF, mask DAIF, enable single-step, and restore DAIF. However, since "D" flag in DAIF is cleared by the 2nd kprobe, the single-step exception happens soon after restoring DAIF.
This has been introduced by commit 7419333fa15e ("arm64: kprobe: Always clear pstate.D in breakpoint exception handler")
To solve this issue, this stores all DAIF bits and restore it after single stepping.
Reported-by: Naresh Kamboju naresh.kamboju@linaro.org Fixes: 7419333fa15e ("arm64: kprobe: Always clear pstate.D in breakpoint exception handler") Reviewed-by: James Morse james.morse@arm.com Tested-by: James Morse james.morse@arm.com Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/daifflags.h | 2 ++ arch/arm64/kernel/probes/kprobes.c | 40 +++++------------------------- 2 files changed, 8 insertions(+), 34 deletions(-)
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ae7e605085d71..9c0e0178ea291 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,6 +13,8 @@ #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT #define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +
/* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 88ce502c8e6f1..624f2501f3f87 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -21,6 +21,7 @@ #include <asm/ptrace.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> +#include <asm/daifflags.h> #include <asm/system_misc.h> #include <asm/insn.h> #include <linux/uaccess.h> @@ -165,33 +166,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p) __this_cpu_write(current_kprobe, p); }
-/* - * When PSTATE.D is set (masked), then software step exceptions can not be - * generated. - * SPSR's D bit shows the value of PSTATE.D immediately before the - * exception was taken. PSTATE.D is set while entering into any exception - * mode, however software clears it for any normal (none-debug-exception) - * mode in the exception entry. Therefore, when we are entering into kprobe - * breakpoint handler from any normal mode then SPSR.D bit is already - * cleared, however it is set when we are entering from any debug exception - * mode. - * Since we always need to generate single step exception after a kprobe - * breakpoint exception therefore we need to clear it unconditionally, when - * we become sure that the current breakpoint exception is for kprobe. - */ -static void __kprobes -spsr_set_debug_flag(struct pt_regs *regs, int mask) -{ - unsigned long spsr = regs->pstate; - - if (mask) - spsr |= PSR_D_BIT; - else - spsr &= ~PSR_D_BIT; - - regs->pstate = spsr; -} - /* * Interrupts need to be disabled before single-step mode is set, and not * reenabled until after single-step mode ends. @@ -203,17 +177,17 @@ spsr_set_debug_flag(struct pt_regs *regs, int mask) static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { - kcb->saved_irqflag = regs->pstate; + kcb->saved_irqflag = regs->pstate & DAIF_MASK; regs->pstate |= PSR_I_BIT; + /* Unmask PSTATE.D for enabling software step exceptions. */ + regs->pstate &= ~PSR_D_BIT; }
static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { - if (kcb->saved_irqflag & PSR_I_BIT) - regs->pstate |= PSR_I_BIT; - else - regs->pstate &= ~PSR_I_BIT; + regs->pstate &= ~DAIF_MASK; + regs->pstate |= kcb->saved_irqflag; }
static void __kprobes @@ -250,8 +224,6 @@ static void __kprobes setup_singlestep(struct kprobe *p,
set_ss_context(kcb, slot); /* mark pending ss */
- spsr_set_debug_flag(regs, 0); - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); kernel_enable_single_step(regs);
From: Masami Hiramatsu mhiramat@kernel.org
[ Upstream commit d8bb6718c4db9bcd075dde7ff55d46091ccfae15 ]
Make debug exceptions visible from RCU so that synchronize_rcu() correctly track the debug exception handler.
This also introduces sanity checks for user-mode exceptions as same as x86's ist_enter()/ist_exit().
The debug exception can interrupt in idle task. For example, it warns if we put a kprobe on a function called from idle task as below. The warning message showed that the rcu_read_lock() caused this problem. But actually, this means the RCU is lost the context which is already in NMI/IRQ.
/sys/kernel/debug/tracing # echo p default_idle_call >> kprobe_events /sys/kernel/debug/tracing # echo 1 > events/kprobes/enable /sys/kernel/debug/tracing # [ 135.122237] [ 135.125035] ============================= [ 135.125310] WARNING: suspicious RCU usage [ 135.125581] 5.2.0-08445-g9187c508bdc7 #20 Not tainted [ 135.125904] ----------------------------- [ 135.126205] include/linux/rcupdate.h:594 rcu_read_lock() used illegally while idle! [ 135.126839] [ 135.126839] other info that might help us debug this: [ 135.126839] [ 135.127410] [ 135.127410] RCU used illegally from idle CPU! [ 135.127410] rcu_scheduler_active = 2, debug_locks = 1 [ 135.128114] RCU used illegally from extended quiescent state! [ 135.128555] 1 lock held by swapper/0/0: [ 135.128944] #0: (____ptrval____) (rcu_read_lock){....}, at: call_break_hook+0x0/0x178 [ 135.130499] [ 135.130499] stack backtrace: [ 135.131192] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-08445-g9187c508bdc7 #20 [ 135.131841] Hardware name: linux,dummy-virt (DT) [ 135.132224] Call trace: [ 135.132491] dump_backtrace+0x0/0x140 [ 135.132806] show_stack+0x24/0x30 [ 135.133133] dump_stack+0xc4/0x10c [ 135.133726] lockdep_rcu_suspicious+0xf8/0x108 [ 135.134171] call_break_hook+0x170/0x178 [ 135.134486] brk_handler+0x28/0x68 [ 135.134792] do_debug_exception+0x90/0x150 [ 135.135051] el1_dbg+0x18/0x8c [ 135.135260] default_idle_call+0x0/0x44 [ 135.135516] cpu_startup_entry+0x2c/0x30 [ 135.135815] rest_init+0x1b0/0x280 [ 135.136044] arch_call_rest_init+0x14/0x1c [ 135.136305] start_kernel+0x4d4/0x500 [ 135.136597]
So make debug exception visible to RCU can fix this warning.
Reported-by: Naresh Kamboju naresh.kamboju@linaro.org Acked-by: Paul E. McKenney paulmck@linux.ibm.com Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/mm/fault.c | 57 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2d115016feb42..414b8e0f19e0e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -800,6 +800,53 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; }
+/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +static void debug_exception_enter(struct pt_regs *regs) +{ + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + } else { + /* + * We might have interrupted pretty much anything. In + * fact, if we're a debug exception, we can even interrupt + * NMI processing. We don't want this code makes in_nmi() + * to return true, but we need to notify RCU. + */ + rcu_nmi_enter(); + } + + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +static void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); + + if (!user_mode(regs)) + rcu_nmi_exit(); + + if (interrupts_enabled(regs)) + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
@@ -840,12 +887,7 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, if (cortex_a76_erratum_1463225_debug_handler(regs)) return;
- /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + debug_exception_enter(regs);
if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); @@ -855,7 +897,6 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, inf->sig, inf->code, (void __user *)pc, esr); }
- if (interrupts_enabled(regs)) - trace_hardirqs_on(); + debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception);
From: Yang Shi yang.shi@linux.alibaba.com
[ Upstream commit df9576def004d2cd5beedc00cb6e8901427634b9 ]
When running ltp's oom test with kmemleak enabled, the below warning was triggerred since kernel detects __GFP_NOFAIL & ~__GFP_DIRECT_RECLAIM is passed in:
WARNING: CPU: 105 PID: 2138 at mm/page_alloc.c:4608 __alloc_pages_nodemask+0x1c31/0x1d50 Modules linked in: loop dax_pmem dax_pmem_core ip_tables x_tables xfs virtio_net net_failover virtio_blk failover ata_generic virtio_pci virtio_ring virtio libata CPU: 105 PID: 2138 Comm: oom01 Not tainted 5.2.0-next-20190710+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:__alloc_pages_nodemask+0x1c31/0x1d50 ... kmemleak_alloc+0x4e/0xb0 kmem_cache_alloc+0x2a7/0x3e0 mempool_alloc_slab+0x2d/0x40 mempool_alloc+0x118/0x2b0 bio_alloc_bioset+0x19d/0x350 get_swap_bio+0x80/0x230 __swap_writepage+0x5ff/0xb20
The mempool_alloc_slab() clears __GFP_DIRECT_RECLAIM, however kmemleak has __GFP_NOFAIL set all the time due to d9570ee3bd1d4f2 ("kmemleak: allow to coexist with fault injection"). But, it doesn't make any sense to have __GFP_NOFAIL and ~__GFP_DIRECT_RECLAIM specified at the same time.
According to the discussion on the mailing list, the commit should be reverted for short term solution. Catalin Marinas would follow up with a better solution for longer term.
The failure rate of kmemleak metadata allocation may increase in some circumstances, but this should be expected side effect.
Link: http://lkml.kernel.org/r/1563299431-111710-1-git-send-email-yang.shi@linux.a... Fixes: d9570ee3bd1d4f2 ("kmemleak: allow to coexist with fault injection") Signed-off-by: Yang Shi yang.shi@linux.alibaba.com Suggested-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Michal Hocko mhocko@suse.com Cc: Dmitry Vyukov dvyukov@google.com Cc: David Rientjes rientjes@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Qian Cai cai@lca.pw Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 3e147ea831826..3afb01bce736a 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -114,7 +114,7 @@ /* GFP bitmask for kmemleak internal allocations */ #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ __GFP_NORETRY | __GFP_NOMEMALLOC | \ - __GFP_NOWARN | __GFP_NOFAIL) + __GFP_NOWARN)
/* scanning area inside a memory block */ struct kmemleak_scan_area {
From: YueHaibing yuehaibing@huawei.com
[ Upstream commit 7bc36e3ce91471b6377c8eadc0a2f220a2280083 ]
Fixes gcc '-Wunused-but-set-variable' warning:
fs/ocfs2/xattr.c: In function ocfs2_xattr_bucket_find: fs/ocfs2/xattr.c:3828:6: warning: variable last_hash set but not used [-Wunused-but-set-variable]
It's never used and can be removed.
Link: http://lkml.kernel.org/r/20190716132110.34836-1-yuehaibing@huawei.com Signed-off-by: YueHaibing yuehaibing@huawei.com Acked-by: Joseph Qi joseph.qi@linux.alibaba.com Cc: Mark Fasheh mark@fasheh.com Cc: Joel Becker jlbec@evilplan.org Cc: Junxiao Bi junxiao.bi@oracle.com Cc: Changwei Ge gechangwei@live.cn Cc: Gang He ghe@suse.com Cc: Jun Piao piaojun@huawei.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ocfs2/xattr.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 385f3aaa24480..90c830e3758e2 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3825,7 +3825,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; struct ocfs2_xattr_bucket *search; - u32 last_hash; u64 blkno, lower_blkno = 0;
search = ocfs2_xattr_bucket_new(inode); @@ -3869,8 +3868,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, if (xh->xh_count) xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
- last_hash = le32_to_cpu(xe->xe_name_hash); - /* record lower_blkno which may be the insert place. */ lower_blkno = blkno;
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit ee38d94a0ad89890b770f6c876263cf9fcbfde84 ]
ARM64 randdconfig builds regularly run into a build error, especially when NUMA_BALANCING and SPARSEMEM are enabled but not SPARSEMEM_VMEMMAP:
#error "KASAN: not enough bits in page flags for tag"
The last-cpuid bits are already contitional on the available space, so the result of the calculation is a bit random on whether they were already left out or not.
Adding the kasan tag bits before last-cpuid makes it much more likely to end up with a successful build here, and should be reliable for randconfig at least, as long as that does not randomize NR_CPUS or NODES_SHIFT but uses the defaults.
In order for the modified check to not trigger in the x86 vdso32 code where all constants are wrong (building with -m32), enclose all the definitions with an #ifdef.
[arnd@arndb.de: build fix] Link: http://lkml.kernel.org/r/CAK8P3a3Mno1SWTcuAOT0Wa9VS15pdU6EfnkxLbDpyS55yO04+g... Link: http://lkml.kernel.org/r/20190722115520.3743282-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190618095347.3850490-1-arnd@arndb.de/ Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Arnd Bergmann arnd@arndb.de Reviewed-by: Andrey Konovalov andreyknvl@google.com Reviewed-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Andrey Konovalov andreyknvl@google.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Will Deacon will.deacon@arm.com Cc: Christoph Lameter cl@linux.com Cc: Mark Rutland mark.rutland@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/vdso/vdso.h | 1 + include/linux/page-flags-layout.h | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/mips/vdso/vdso.h b/arch/mips/vdso/vdso.h index 14b1931be69c3..b65b169778e31 100644 --- a/arch/mips/vdso/vdso.h +++ b/arch/mips/vdso/vdso.h @@ -9,6 +9,7 @@ #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT)
/* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */ +#define BUILD_VDSO32_64 #undef CONFIG_64BIT #define CONFIG_32BIT 1 #ifndef __ASSEMBLY__ diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 1dda31825ec4a..71283739ffd23 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,6 +32,7 @@
#endif /* CONFIG_SPARSEMEM */
+#ifndef BUILD_VDSO32_64 /* * page->flags layout: * @@ -76,20 +77,22 @@ #define LAST_CPUPID_SHIFT 0 #endif
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif
-#ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_TAG_WIDTH 8 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ > BITS_PER_LONG - NR_PAGEFLAGS -#error "KASAN: not enough bits in page flags for tag" -#endif -#else -#define KASAN_TAG_WIDTH 0 +#error "Not enough bits in page flags" #endif
/* @@ -104,4 +107,5 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif
+#endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
From: Paul Wise pabs3@bonedaddy.net
[ Upstream commit 315c69261dd3fa12dbc830d4fa00d1fad98d3b03 ]
Save the offsets of the start of each argument to avoid having to update pointers to each argument after every corename krealloc and to avoid having to duplicate the memory for the dump command.
Executable names containing spaces were previously being expanded from %e or %E and then split in the middle of the filename. This is incorrect behaviour since an argument list can represent arguments with spaces.
The splitting could lead to extra arguments being passed to the core dump handler that it might have interpreted as options or ignored completely.
Core dump handlers that are not aware of this Linux kernel issue will be using %e or %E without considering that it may be split and so they will be vulnerable to processes with spaces in their names breaking their argument list. If their internals are otherwise well written, such as if they are written in shell but quote arguments, they will work better after this change than before. If they are not well written, then there is a slight chance of breakage depending on the details of the code but they will already be fairly broken by the split filenames.
Core dump handlers that are aware of this Linux kernel issue will be placing %e or %E as the last item in their core_pattern and then aggregating all of the remaining arguments into one, separated by spaces. Alternatively they will be obtaining the filename via other methods. Both of these will be compatible with the new arrangement.
A side effect from this change is that unknown template types (for example %z) result in an empty argument to the dump handler instead of the argument being dropped. This is a desired change as:
It is easier for dump handlers to process empty arguments than dropped ones, especially if they are written in shell or don't pass each template item with a preceding command-line option in order to differentiate between individual template types. Most core_patterns in the wild do not use options so they can confuse different template types (especially numeric ones) if an earlier one gets dropped in old kernels. If the kernel introduces a new template type and a core_pattern uses it, the core dump handler might not expect that the argument can be dropped in old kernels.
For example, this can result in security issues when %d is dropped in old kernels. This happened with the corekeeper package in Debian and resulted in the interface between corekeeper and Linux having to be rewritten to use command-line options to differentiate between template types.
The core_pattern for most core dump handlers is written by the handler author who would generally not insert unknown template types so this change should be compatible with all the core dump handlers that exist.
Link: http://lkml.kernel.org/r/20190528051142.24939-1-pabs3@bonedaddy.net Fixes: 74aadce98605 ("core_pattern: allow passing of arguments to user mode helper when core_pattern is a pipe") Signed-off-by: Paul Wise pabs3@bonedaddy.net Reported-by: Jakub Wilk jwilk@jwilk.net [https://bugs.debian.org/924398] Reported-by: Paul Wise pabs3@bonedaddy.net [https://lore.kernel.org/linux-fsdevel/c8b7ecb8508895bf4adb62a748e2ea2c718545...] Suggested-by: Jakub Wilk jwilk@jwilk.net Acked-by: Neil Horman nhorman@tuxdriver.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/coredump.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c index e42e17e55bfd5..b1ea7dfbd1494 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -7,6 +7,7 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> +#include <linux/ctype.h> #include <linux/string.h> #include <linux/init.h> #include <linux/pagemap.h> @@ -187,11 +188,13 @@ static int cn_print_exe_file(struct core_name *cn) * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, struct coredump_params *cprm) +static int format_corename(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); + bool was_space = false; int pid_in_pattern = 0; int err = 0;
@@ -201,12 +204,35 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) return -ENOMEM; cn->corename[0] = '\0';
- if (ispipe) + if (ispipe) { + int argvs = sizeof(core_pattern) / 2; + (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); + if (!(*argv)) + return -ENOMEM; + (*argv)[(*argc)++] = 0; ++pat_ptr; + }
/* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { + /* + * Split on spaces before doing template expansion so that + * %e and %E don't get split if they have spaces in them + */ + if (ispipe) { + if (isspace(*pat_ptr)) { + was_space = true; + pat_ptr++; + continue; + } else if (was_space) { + was_space = false; + err = cn_printf(cn, "%c", '\0'); + if (err) + return err; + (*argv)[(*argc)++] = cn->used; + } + } if (*pat_ptr != '%') { err = cn_printf(cn, "%c", *pat_ptr++); } else { @@ -546,6 +572,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) struct cred *cred; int retval = 0; int ispipe; + size_t *argv = NULL; + int argc = 0; struct files_struct *displaced; /* require nonrelative corefile path and be extra careful */ bool need_suid_safe = false; @@ -592,9 +620,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
old_cred = override_creds(cred);
- ispipe = format_corename(&cn, &cprm); + ispipe = format_corename(&cn, &cprm, &argv, &argc);
if (ispipe) { + int argi; int dump_count; char **helper_argv; struct subprocess_info *sub_info; @@ -637,12 +666,16 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto fail_dropcount; }
- helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), + GFP_KERNEL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); goto fail_dropcount; } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn.corename + argv[argi]; + helper_argv[argi] = NULL;
retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], @@ -652,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
- argv_free(helper_argv); + kfree(helper_argv); if (retval) { printk(KERN_INFO "Core dump to |%s pipe failed\n", cn.corename); @@ -766,6 +799,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + kfree(argv); kfree(cn.corename); coredump_finish(mm, core_dumped); revert_creds(old_cred);
On Tue, 2019-08-06 at 17:33 -0400, Sasha Levin wrote:
From: Paul Wise pabs3@bonedaddy.net
[ Upstream commit 315c69261dd3fa12dbc830d4fa00d1fad98d3b03 ]
The patch changes the behaviour of the interface between the Linux kernel and userspace core dump handlers. The previous behaviour was unlikely to be depended on by any core dump handler but it is still a behaviour change, so I think it would be best to keep it out of the stable branches and would prefer to have folks encounter the change as Linux distros etc roll out 5.3 and later into their dev releases.
We discussed this on #kernelnewbies a while ago and gregkh agreed that it should stew a while longer before reaching any stable releases.
In addition if it gets backported to stable releases, my patch for core(5) from man-pages will have to get more complicated :)
On Wed, Aug 07, 2019 at 09:41:46AM +0800, Paul Wise wrote:
On Tue, 2019-08-06 at 17:33 -0400, Sasha Levin wrote:
From: Paul Wise pabs3@bonedaddy.net
[ Upstream commit 315c69261dd3fa12dbc830d4fa00d1fad98d3b03 ]
The patch changes the behaviour of the interface between the Linux kernel and userspace core dump handlers. The previous behaviour was unlikely to be depended on by any core dump handler but it is still a behaviour change, so I think it would be best to keep it out of the stable branches and would prefer to have folks encounter the change as Linux distros etc roll out 5.3 and later into their dev releases.
We discussed this on #kernelnewbies a while ago and gregkh agreed that it should stew a while longer before reaching any stable releases.
In addition if it gets backported to stable releases, my patch for core(5) from man-pages will have to get more complicated :)
I'll just drop it and let Greg deal with it then :)
-- Thanks, Sasha
From: Qian Cai cai@lca.pw
[ Upstream commit cbedfe11347fe418621bd188d58a206beb676218 ]
Commit d66acc39c7ce ("bitops: Optimise get_order()") introduced a compilation warning because "rx_frag_size" is an "ushort" while PAGE_SHIFT here is 16.
The commit changed the get_order() to be a multi-line macro where compilers insist to check all statements in the macro even when __builtin_constant_p(rx_frag_size) will return false as "rx_frag_size" is a module parameter.
In file included from ./arch/powerpc/include/asm/page_64.h:107, from ./arch/powerpc/include/asm/page.h:242, from ./arch/powerpc/include/asm/mmu.h:132, from ./arch/powerpc/include/asm/lppaca.h:47, from ./arch/powerpc/include/asm/paca.h:17, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./arch/powerpc/include/asm/processor.h:39, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: drivers/net/ethernet/emulex/benet/be_main.c: In function 'be_rx_cqs_create': ./include/asm-generic/getorder.h:54:9: warning: comparison is always true due to limited range of data type [-Wtype-limits] (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ ^ drivers/net/ethernet/emulex/benet/be_main.c:3138:33: note: in expansion of macro 'get_order' adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; ^~~~~~~~~
Fix it by moving all of this multi-line macro into a proper function, and killing __get_order() off.
[akpm@linux-foundation.org: remove __get_order() altogether] [cai@lca.pw: v2] Link: http://lkml.kernel.org/r/1564000166-31428-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/1563914986-26502-1-git-send-email-cai@lca.pw Fixes: d66acc39c7ce ("bitops: Optimise get_order()") Signed-off-by: Qian Cai cai@lca.pw Reviewed-by: Nathan Chancellor natechancellor@gmail.com Cc: David S. Miller davem@davemloft.net Cc: Arnd Bergmann arnd@arndb.de Cc: David Howells dhowells@redhat.com Cc: Jakub Jelinek jakub@redhat.com Cc: Nick Desaulniers ndesaulniers@google.com Cc: Bill Wendling morbo@google.com Cc: James Y Knight jyknight@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/asm-generic/getorder.h | 50 ++++++++++++++-------------------- 1 file changed, 20 insertions(+), 30 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index c64bea7a52beb..e9f20b813a699 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -7,24 +7,6 @@ #include <linux/compiler.h> #include <linux/log2.h>
-/* - * Runtime evaluation of get_order() - */ -static inline __attribute_const__ -int __get_order(unsigned long size) -{ - int order; - - size--; - size >>= PAGE_SHIFT; -#if BITS_PER_LONG == 32 - order = fls(size); -#else - order = fls64(size); -#endif - return order; -} - /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order @@ -43,19 +25,27 @@ int __get_order(unsigned long size) * to hold an object of the specified size. * * The result is undefined if the size is 0. - * - * This function may be used to initialise variables with compile time - * evaluations of constants. */ -#define get_order(n) \ -( \ - __builtin_constant_p(n) ? ( \ - ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ - (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ - ilog2((n) - 1) - PAGE_SHIFT + 1) \ - ) : \ - __get_order(n) \ -) +static inline __attribute_const__ int get_order(unsigned long size) +{ + if (__builtin_constant_p(size)) { + if (!size) + return BITS_PER_LONG - PAGE_SHIFT; + + if (size < (1UL << PAGE_SHIFT)) + return 0; + + return ilog2((size) - 1) - PAGE_SHIFT + 1; + } + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + return fls(size); +#else + return fls64(size); +#endif +}
#endif /* __ASSEMBLY__ */
From: Nayna Jain nayna@linux.ibm.com
[ Upstream commit fa4f99c05320eb28bf6ba52a9adf64d888da1f9e ]
The nr_allocated_banks and allocated banks are initialized as part of tpm_chip_register. Currently, this is done as part of auto startup function. However, some drivers, like the ibm vtpm driver, do not run auto startup during initialization. This results in uninitialized memory issue and causes a kernel panic during boot.
This patch moves the pcr allocation outside the auto startup function into tpm_chip_register. This ensures that allocated banks are initialized in any case.
Fixes: 879b589210a9 ("tpm: retrieve digest size of unknown algorithms with PCR read") Reported-by: Michal Suchanek msuchanek@suse.de Signed-off-by: Nayna Jain nayna@linux.ibm.com Reviewed-by: Mimi Zohar zohar@linux.ibm.com Tested-by: Sachin Sant sachinp@linux.vnet.ibm.com Tested-by: Michal Suchánek msuchanek@suse.de Reviewed-by: Jarkko Sakkinen jarkko.sakkinen@linux.intel.com Signed-off-by: Jarkko Sakkinen jarkko.sakkinen@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/char/tpm/tpm-chip.c | 20 ++++++++++++++++++++ drivers/char/tpm/tpm.h | 2 ++ drivers/char/tpm/tpm1-cmd.c | 36 ++++++++++++++++++++++++------------ drivers/char/tpm/tpm2-cmd.c | 6 +----- 4 files changed, 47 insertions(+), 17 deletions(-)
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index d47ad10a35fe3..1d3c25831604a 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -545,6 +545,20 @@ static int tpm_add_hwrng(struct tpm_chip *chip) return hwrng_register(&chip->hwrng); }
+static int tpm_get_pcr_allocation(struct tpm_chip *chip) +{ + int rc; + + rc = (chip->flags & TPM_CHIP_FLAG_TPM2) ? + tpm2_get_pcr_allocation(chip) : + tpm1_get_pcr_allocation(chip); + + if (rc > 0) + return -ENODEV; + + return rc; +} + /* * tpm_chip_register() - create a character device for the TPM chip * @chip: TPM chip to use. @@ -564,6 +578,12 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) return rc; rc = tpm_auto_startup(chip); + if (rc) { + tpm_chip_stop(chip); + return rc; + } + + rc = tpm_get_pcr_allocation(chip); tpm_chip_stop(chip); if (rc) return rc; diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index e503ffc3aa39c..a7fea3e0ca86a 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -394,6 +394,7 @@ int tpm1_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf); ssize_t tpm1_getcap(struct tpm_chip *chip, u32 subcap_id, cap_t *cap, const char *desc, size_t min_cap_length); int tpm1_get_random(struct tpm_chip *chip, u8 *out, size_t max); +int tpm1_get_pcr_allocation(struct tpm_chip *chip); unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); int tpm_pm_suspend(struct device *dev); int tpm_pm_resume(struct device *dev); @@ -449,6 +450,7 @@ int tpm2_unseal_trusted(struct tpm_chip *chip, ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, const char *desc);
+ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip); int tpm2_auto_startup(struct tpm_chip *chip); void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type); unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c index faacbe1ffa1a9..149e953ca3699 100644 --- a/drivers/char/tpm/tpm1-cmd.c +++ b/drivers/char/tpm/tpm1-cmd.c @@ -699,18 +699,6 @@ int tpm1_auto_startup(struct tpm_chip *chip) goto out; }
- chip->allocated_banks = kcalloc(1, sizeof(*chip->allocated_banks), - GFP_KERNEL); - if (!chip->allocated_banks) { - rc = -ENOMEM; - goto out; - } - - chip->allocated_banks[0].alg_id = TPM_ALG_SHA1; - chip->allocated_banks[0].digest_size = hash_digest_size[HASH_ALGO_SHA1]; - chip->allocated_banks[0].crypto_id = HASH_ALGO_SHA1; - chip->nr_allocated_banks = 1; - return rc; out: if (rc > 0) @@ -779,3 +767,27 @@ int tpm1_pm_suspend(struct tpm_chip *chip, u32 tpm_suspend_pcr) return rc; }
+/** + * tpm1_get_pcr_allocation() - initialize the allocated bank + * @chip: TPM chip to use. + * + * The function initializes the SHA1 allocated bank to extend PCR + * + * Return: + * * 0 on success, + * * < 0 on error. + */ +int tpm1_get_pcr_allocation(struct tpm_chip *chip) +{ + chip->allocated_banks = kcalloc(1, sizeof(*chip->allocated_banks), + GFP_KERNEL); + if (!chip->allocated_banks) + return -ENOMEM; + + chip->allocated_banks[0].alg_id = TPM_ALG_SHA1; + chip->allocated_banks[0].digest_size = hash_digest_size[HASH_ALGO_SHA1]; + chip->allocated_banks[0].crypto_id = HASH_ALGO_SHA1; + chip->nr_allocated_banks = 1; + + return 0; +} diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index d103545e40550..ba9acae83bff1 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -840,7 +840,7 @@ struct tpm2_pcr_selection { u8 pcr_select[3]; } __packed;
-static ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip) +ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip) { struct tpm2_pcr_selection pcr_selection; struct tpm_buf buf; @@ -1040,10 +1040,6 @@ int tpm2_auto_startup(struct tpm_chip *chip) goto out; }
- rc = tpm2_get_pcr_allocation(chip); - if (rc) - goto out; - rc = tpm2_get_cc_attrs_tbl(chip);
out:
linux-stable-mirror@lists.linaro.org