The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001
From: Vijay Balakrishna <vijayb(a)linux.microsoft.com>
Date: Wed, 2 Mar 2022 09:38:09 -0800
Subject: [PATCH] arm64: Do not defer reserve_crashkernel() for platforms with
no DMA memory zones
The following patches resulted in deferring crash kernel reservation to
mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU),
in particular Raspberry Pi 4.
commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges")
commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()")
commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required")
Above changes introduced boot slowdown due to linear map creation for
all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed
changes restore crash kernel reservation to earlier behavior thus avoids
slow boot, particularly for platforms with IOMMU (no DMA memory zones).
Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU
and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm
no regression to deferring scheme of crash kernel memory reservation.
In both cases successfully collected kernel crash dump.
[1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.micr…
Signed-off-by: Vijay Balakrishna <vijayb(a)linux.microsoft.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.mi…
[will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build]
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771..919be440494f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
+ *
+ * Memory reservation for crash kernel either done early or deferred
+ * depending on DMA memory zones configs (ZONE_DMA) --
+ *
+ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
+ * here instead of max_zone_phys(). This lets early reservation of
+ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
+ * Reserving memory early for crash kernel allows linear creation of block
+ * mappings (greater than page-granularity) for all the memory bank rangs.
+ * In this scheme a comparatively quicker boot is observed.
+ *
+ * If ZONE_DMA configs are defined, crash kernel memory reservation
+ * is delayed until DMA zone memory range size initilazation performed in
+ * zone_sizes_init(). The defer is necessary to steer clear of DMA zone
+ * memory range to avoid overlap allocation. So crash kernel memory boundaries
+ * are not known when mapping all bank memory ranges, which otherwise means
+ * not possible to exclude crash kernel range from creating block mappings
+ * so page-granularity mappings are created for the entire memory range.
+ * Hence a slightly slower boot is observed.
+ *
+ * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
*/
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
+phys_addr_t __ro_after_init arm64_dma_phys_limit;
+#else
+const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1;
+#endif
#ifdef CONFIG_KEXEC_CORE
/*
@@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
- if (!arm64_dma_phys_limit)
- arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
+
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
@@ -361,7 +388,8 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- reserve_crashkernel();
+ if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
memblock_dump_all();
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8..ed21bf83d0b7 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
+ if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp)
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA) ||
+ IS_ENABLED(CONFIG_ZONE_DMA32))
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+ else if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+#endif
+
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp)
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map &&
+ !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ if (crashk_res.end) {
+ __map_memblock(pgdp, crashk_res.start,
+ crashk_res.end + 1,
+ PAGE_KERNEL,
+ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+ }
+#endif
}
void mark_rodata_ro(void)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001
From: Vijay Balakrishna <vijayb(a)linux.microsoft.com>
Date: Wed, 2 Mar 2022 09:38:09 -0800
Subject: [PATCH] arm64: Do not defer reserve_crashkernel() for platforms with
no DMA memory zones
The following patches resulted in deferring crash kernel reservation to
mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU),
in particular Raspberry Pi 4.
commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges")
commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()")
commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required")
Above changes introduced boot slowdown due to linear map creation for
all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed
changes restore crash kernel reservation to earlier behavior thus avoids
slow boot, particularly for platforms with IOMMU (no DMA memory zones).
Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU
and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm
no regression to deferring scheme of crash kernel memory reservation.
In both cases successfully collected kernel crash dump.
[1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.micr…
Signed-off-by: Vijay Balakrishna <vijayb(a)linux.microsoft.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.mi…
[will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build]
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771..919be440494f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
+ *
+ * Memory reservation for crash kernel either done early or deferred
+ * depending on DMA memory zones configs (ZONE_DMA) --
+ *
+ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
+ * here instead of max_zone_phys(). This lets early reservation of
+ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
+ * Reserving memory early for crash kernel allows linear creation of block
+ * mappings (greater than page-granularity) for all the memory bank rangs.
+ * In this scheme a comparatively quicker boot is observed.
+ *
+ * If ZONE_DMA configs are defined, crash kernel memory reservation
+ * is delayed until DMA zone memory range size initilazation performed in
+ * zone_sizes_init(). The defer is necessary to steer clear of DMA zone
+ * memory range to avoid overlap allocation. So crash kernel memory boundaries
+ * are not known when mapping all bank memory ranges, which otherwise means
+ * not possible to exclude crash kernel range from creating block mappings
+ * so page-granularity mappings are created for the entire memory range.
+ * Hence a slightly slower boot is observed.
+ *
+ * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
*/
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
+phys_addr_t __ro_after_init arm64_dma_phys_limit;
+#else
+const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1;
+#endif
#ifdef CONFIG_KEXEC_CORE
/*
@@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
- if (!arm64_dma_phys_limit)
- arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
+
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
@@ -361,7 +388,8 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- reserve_crashkernel();
+ if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
memblock_dump_all();
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8..ed21bf83d0b7 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
+ if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp)
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA) ||
+ IS_ENABLED(CONFIG_ZONE_DMA32))
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+ else if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+#endif
+
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp)
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map &&
+ !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ if (crashk_res.end) {
+ __map_memblock(pgdp, crashk_res.start,
+ crashk_res.end + 1,
+ PAGE_KERNEL,
+ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+ }
+#endif
}
void mark_rodata_ro(void)
From: Ben Dooks <ben.dooks(a)codethink.co.uk>
commit a382c757ec5ef83137a86125f43a4c43dc2ab50b upstream.
The fu740 PCIe core does not probe any devices on the SiFive Unmatched
board without this fix (or having U-Boot explicitly start the PCIe via
either boot-script or user command). The fix is to start the link at
2.5GT/s speeds and once the link is up then change the maximum speed back
to the default.
The U-Boot driver claims to set the link-speed to 2.5GT/s to get the probe
to work (and U-Boot does print link up at 2.5GT/s) in the following code:
https://source.denx.de/u-boot/u-boot/-/blob/master/drivers/pci/pcie_dw_sifi…
Link: https://lore.kernel.org/r/20220318152430.526320-1-ben.dooks@codethink.co.uk
Signed-off-by: Ben Dooks <ben.dooks(a)codethink.co.uk>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Acked-by: Palmer Dabbelt <palmer(a)rivosinc.com>
Signed-off-by: Dimitri John Ledkov <dimitri.ledkov(a)canonical.com>
---
Please apply this patch to v5.15+ stable trees which fixes PCIe on
the very popular SiFive Unmatched RISC-V board.
drivers/pci/controller/dwc/pcie-fu740.c | 51 ++++++++++++++++++++++++-
1 file changed, 50 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c
index 00cde9a248b5..78d002be4f82 100644
--- a/drivers/pci/controller/dwc/pcie-fu740.c
+++ b/drivers/pci/controller/dwc/pcie-fu740.c
@@ -181,10 +181,59 @@ static int fu740_pcie_start_link(struct dw_pcie *pci)
{
struct device *dev = pci->dev;
struct fu740_pcie *afp = dev_get_drvdata(dev);
+ u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+ int ret;
+ u32 orig, tmp;
+
+ /*
+ * Force 2.5GT/s when starting the link, due to some devices not
+ * probing at higher speeds. This happens with the PCIe switch
+ * on the Unmatched board when U-Boot has not initialised the PCIe.
+ * The fix in U-Boot is to force 2.5GT/s, which then gets cleared
+ * by the soft reset done by this driver.
+ */
+ dev_dbg(dev, "cap_exp at %x\n", cap_exp);
+ dw_pcie_dbi_ro_wr_en(pci);
+
+ tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
+ orig = tmp & PCI_EXP_LNKCAP_SLS;
+ tmp &= ~PCI_EXP_LNKCAP_SLS;
+ tmp |= PCI_EXP_LNKCAP_SLS_2_5GB;
+ dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
/* Enable LTSSM */
writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE);
- return 0;
+
+ ret = dw_pcie_wait_for_link(pci);
+ if (ret) {
+ dev_err(dev, "error: link did not start\n");
+ goto err;
+ }
+
+ tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
+ if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) {
+ dev_dbg(dev, "changing speed back to original\n");
+
+ tmp &= ~PCI_EXP_LNKCAP_SLS;
+ tmp |= orig;
+ dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
+
+ tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+ tmp |= PORT_LOGIC_SPEED_CHANGE;
+ dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp);
+
+ ret = dw_pcie_wait_for_link(pci);
+ if (ret) {
+ dev_err(dev, "error: link did not start at new speed\n");
+ goto err;
+ }
+ }
+
+ ret = 0;
+err:
+ WARN_ON(ret); /* we assume that errors will be very rare */
+ dw_pcie_dbi_ro_wr_dis(pci);
+ return ret;
}
static int fu740_pcie_host_init(struct pcie_port *pp)
--
2.32.0