A test of the form:
while true; do modprobe -r cxl_pmem; modprobe cxl_pmem; done
May lead to a crash signature of the form:
BUG: unable to handle page fault for address: ffffffffc0660030
#PF: supervisor instruction fetch in kernel mode
#PF: error_code(0x0010) - not-present page
[..]
Workqueue: cxl_pmem 0xffffffffc0660030
RIP: 0010:0xffffffffc0660030
Code: Unable to access opcode bytes at RIP 0xffffffffc0660006.
[..]
Call Trace:
? process_one_work+0x4ec/0x9c0
? pwq_dec_nr_in_flight+0x100/0x100
? rwlock_bug.part.0+0x60/0x60
? worker_thread+0x2eb/0x700
In that report the 0xffffffffc0660030 address corresponds to the former
function address of cxl_nvb_update_state() from a previous load of the
module, not the current address. Fix that by arranging for ->state_work
in the 'struct cxl_nvdimm_bridge' object to be reinitialized on cxl_pmem
module reload.
Details:
Recall that CXL subsystem wants to link a CXL memory expander device to
an NVDIMM sub-hierarchy when both a persistent memory range has been
registered by the CXL platform driver (cxl_acpi) *and* when that CXL
memory expander has published persistent memory capacity (Get Partition
Info). To this end the cxl_nvdimm_bridge driver arranges to rescan the
CXL bus when either of those conditions change. The helper
bus_rescan_devices() can not be called underneath the device_lock() for
any device on that bus, so the cxl_nvdimm_bridge driver uses a workqueue
for the rescan.
Typically a driver allocates driver data to hold a 'struct work_struct'
for a driven device, but for a workqueue that may run after ->remove()
returns, driver data will have been freed. The 'struct
cxl_nvdimm_bridge' object holds the state and work_struct directly.
Unfortunately it was only arranging for that infrastructure to be
initialized once per device creation rather than the necessary once per
workqueue (cxl_pmem_wq) creation.
Introduce is_cxl_nvdimm_bridge() and cxl_nvdimm_bridge_reset() in
support of invalidating stale references to a recently destroyed
cxl_pmem_wq.
Cc: <stable(a)vger.kernel.org>
Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support")
Reported-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/cxl/core/pmem.c | 8 +++++++-
drivers/cxl/cxl.h | 8 ++++++++
drivers/cxl/pmem.c | 29 +++++++++++++++++++++++++++--
3 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 76a4fa39834c..cc402cb7a905 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -51,10 +51,16 @@ struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm_bridge, CXL);
-__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+bool is_cxl_nvdimm_bridge(struct device *dev)
{
return dev->type == &cxl_nvdimm_bridge_type;
}
+EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL);
+
+__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+{
+ return is_cxl_nvdimm_bridge(dev);
+}
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
{
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 5e2e93451928..ca979ee11017 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -221,6 +221,13 @@ struct cxl_decoder {
};
+/**
+ * enum cxl_nvdimm_brige_state - state machine for managing bus rescans
+ * @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed
+ * @CXL_NVB_DEAD: Set at brige unregistration to preclude async probing
+ * @CXL_NVB_ONLINE: Target state after successful ->probe()
+ * @CXL_NVB_OFFLINE: Target state after ->remove() or failed ->probe()
+ */
enum cxl_nvdimm_brige_state {
CXL_NVB_NEW,
CXL_NVB_DEAD,
@@ -333,6 +340,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_port *port);
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
+bool is_cxl_nvdimm_bridge(struct device *dev);
int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd);
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd);
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 17e82ae90456..b65a272a2d6d 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -315,6 +315,31 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = {
.id = CXL_DEVICE_NVDIMM_BRIDGE,
};
+/*
+ * Return all bridges to the CXL_NVB_NEW state to invalidate any
+ * ->state_work referring to the now destroyed cxl_pmem_wq.
+ */
+static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
+{
+ struct cxl_nvdimm_bridge *cxl_nvb;
+
+ if (!is_cxl_nvdimm_bridge(dev))
+ return 0;
+
+ cxl_nvb = to_cxl_nvdimm_bridge(dev);
+ device_lock(dev);
+ cxl_nvb->state = CXL_NVB_NEW;
+ device_unlock(dev);
+
+ return 0;
+}
+
+static void destroy_cxl_pmem_wq(void)
+{
+ destroy_workqueue(cxl_pmem_wq);
+ bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
+}
+
static __init int cxl_pmem_init(void)
{
int rc;
@@ -340,7 +365,7 @@ static __init int cxl_pmem_init(void)
err_nvdimm:
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
err_bridge:
- destroy_workqueue(cxl_pmem_wq);
+ destroy_cxl_pmem_wq();
return rc;
}
@@ -348,7 +373,7 @@ static __exit void cxl_pmem_exit(void)
{
cxl_driver_unregister(&cxl_nvdimm_driver);
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
- destroy_workqueue(cxl_pmem_wq);
+ destroy_cxl_pmem_wq();
}
MODULE_LICENSE("GPL v2");
From: Mark Rutland <mark.rutland(a)arm.com>
Since ARMv8.0 the upper 32 bits of ESR_ELx have been RES0, and recently
some of the upper bits gained a meaning and can be non-zero. For
example, when FEAT_LS64 is implemented, ESR_ELx[36:32] contain ISS2,
which for an ST64BV or ST64BV0 can be non-zero. This can be seen in ARM
DDI 0487G.b, page D13-3145, section D13.2.37.
Generally, we must not rely on RES0 bit remaining zero in future, and
when extracting ESR_ELx.EC we must mask out all other bits.
All C code uses the ESR_ELx_EC() macro, which masks out the irrelevant
bits, and therefore no alterations are required to C code to avoid
consuming irrelevant bits.
In a couple of places the KVM assembly extracts ESR_ELx.EC using LSR on
an X register, and so could in theory consume previously RES0 bits. In
both cases this is for comparison with EC values ESR_ELx_EC_HVC32 and
ESR_ELx_EC_HVC64, for which the upper bits of ESR_ELx must currently be
zero, but this could change in future.
This patch adjusts the KVM vectors to use UBFX rather than LSR to
extract ESR_ELx.EC, ensuring these are robust to future additions to
ESR_ELx.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Alexandru Elisei <alexandru.elisei(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Acked-by: Will Deacon <will(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20211103110545.4613-1-mark.rutland@arm.com
---
arch/arm64/include/asm/esr.h | 1 +
arch/arm64/kvm/hyp/hyp-entry.S | 2 +-
arch/arm64/kvm/hyp/nvhe/host.S | 2 +-
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 29f97eb3dad4..8f59bbeba7a7 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -68,6 +68,7 @@
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_WIDTH (6)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9aa9b73475c9..b6b6801d96d5 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -44,7 +44,7 @@
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 0c6116d34e18..3d613e721a75 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -141,7 +141,7 @@ SYM_FUNC_END(__host_hvc)
.L__vect_start\@:
stp x0, x1, [sp, #-16]!
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
b.eq __host_hvc
b __host_exit
--
2.30.2
When reporting IOMAP_INLINE extents, filesystems set iomap->length to
the length of iomap->inline_data. For reading that into the page cache,
function iomap_read_inline_data copies the inline data, zeroes out the
rest of the page, and marks the entire page up-to-date.
Before commit 740499c78408 ("iomap: fix the iomap_readpage_actor return
value for inline data"), when hitting an IOMAP_INLINE extent,
iomap_readpage_actor would report having read the entire page. Since
then, it only reports having read the inline data (iomap->length).
This will force iomap_readpage into another iteration, and the
filesystem will report an unaligned hole after the IOMAP_INLINE extent.
But iomap_readpage_actor (now iomap_readpage_iter) isn't prepared to
deal with unaligned extents, it will get things wrong on filesystems
with a block size smaller than the page size, and we'll eventually run
into the following warning in iomap_iter_advance:
WARN_ON_ONCE(iter->processed > iomap_length(iter));
Fix that by changing iomap_readpage_iter back to report that we've read
the entire page, which avoids having to deal with unaligned extents. To
prevent iomap from complaining about running past the end of the extent,
fix up the extent size as well.
Fixes: 740499c78408 ("iomap: fix the iomap_readpage_actor return value for inline data")
Cc: stable(a)vger.kernel.org # v5.15+
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
---
fs/iomap/buffered-io.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 1753c26c8e76..de3fcd2522a2 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -244,10 +244,10 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
pos >= i_size_read(iter->inode);
}
-static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
+static loff_t iomap_readpage_iter(struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx, loff_t offset)
{
- const struct iomap *iomap = &iter->iomap;
+ struct iomap *iomap = &iter->iomap;
loff_t pos = iter->pos + offset;
loff_t length = iomap_length(iter) - offset;
struct page *page = ctx->cur_page;
@@ -256,8 +256,15 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
unsigned poff, plen;
sector_t sector;
- if (iomap->type == IOMAP_INLINE)
- return min(iomap_read_inline_data(iter, page), length);
+ if (iomap->type == IOMAP_INLINE) {
+ /*
+ * The filesystem sets iomap->length to the size of the inline
+ * data. We're at the end of the file, so we know that the
+ * rest of the page needs to be zeroed out.
+ */
+ iomap->length = iomap_read_inline_data(iter, page);
+ return iomap->length;
+ }
/* zero post-eof blocks as the page may be mapped */
iop = iomap_page_create(iter->inode, page);
@@ -352,7 +359,7 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
}
EXPORT_SYMBOL_GPL(iomap_readpage);
-static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
+static loff_t iomap_readahead_iter(struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx)
{
loff_t length = iomap_length(iter);
--
2.31.1
Hi Linus,
My testing has been failing for the last few days. Last good test was
with 6f2b76a4a384 and I started seeing the failure with ce840177930f5
where boot timeout.
Last good test - https://openqa.qa.codethink.co.uk/tests/323
Failing test - https://openqa.qa.codethink.co.uk/tests/335
Saw a similar issue with 5.10.79-rc1 today and bisect showed the
problem with 8615ff6dd1ac but that was already in the last good test I
had.
I will do a bisect tonight and let you know the result.
--
Regards
Sudip
Commit 6098475d4cb4 ("spi: Fix deadlock when adding SPI controllers on
SPI buses") introduced a per-controller mutex. But mutex_unlock() of
said lock is called after the controller is already freed:
spi_unregister_controller(ctlr)
-> put_device(&ctlr->dev)
-> spi_controller_release(dev)
-> mutex_unlock(&ctrl->add_lock)
Move the put_device() after the mutex_unlock().
Fixes: 6098475d4cb4 ("spi: Fix deadlock when adding SPI controllers on SPI buses")
Signed-off-by: Michael Walle <michael(a)walle.cc>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig(a)pengutronix.de>
Reviewed-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v5.15
---
changes since RFC:
- fix call graph indendation in commit message
drivers/spi/spi.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index b23e675953e1..fdd530b150a7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3099,12 +3099,6 @@ void spi_unregister_controller(struct spi_controller *ctlr)
device_del(&ctlr->dev);
- /* Release the last reference on the controller if its driver
- * has not yet been converted to devm_spi_alloc_master/slave().
- */
- if (!ctlr->devm_allocated)
- put_device(&ctlr->dev);
-
/* free bus id */
mutex_lock(&board_lock);
if (found == ctlr)
@@ -3113,6 +3107,12 @@ void spi_unregister_controller(struct spi_controller *ctlr)
if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
mutex_unlock(&ctlr->add_lock);
+
+ /* Release the last reference on the controller if its driver
+ * has not yet been converted to devm_spi_alloc_master/slave().
+ */
+ if (!ctlr->devm_allocated)
+ put_device(&ctlr->dev);
}
EXPORT_SYMBOL_GPL(spi_unregister_controller);
--
2.30.2
Hi Sjoerd,
On 05/11/2021 08:10, Sjoerd Simons wrote:
> Hostfs was not setting up the backing device information, which means it
> uses the noop bdi. The noop bdi does not have the writeback capability
> enabled, which in turns means dirty pages never got written back to
> storage.
>
> In other words programs using mmap to write to files on hostfs never
> actually got their data written out...
>
> Fix this by simply setting up the bdi with default settings as all the
> required code for writeback is already in place.
>
> Signed-off-by: Sjoerd Simons <sjoerd(a)collabora.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Christopher Obbard <chris.obbard(a)collabora.com>
...replying mainly as I wonder if adding the stable tag in a reply will
make the patch appear in stable (obviously once it is in mainline) ? :-)
>
> ---
>
> fs/hostfs/hostfs_kern.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
> index d5c9d886cd9f..ef481c3d9019 100644
> --- a/fs/hostfs/hostfs_kern.c
> +++ b/fs/hostfs/hostfs_kern.c
> @@ -924,6 +924,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
> sb->s_op = &hostfs_sbops;
> sb->s_d_op = &simple_dentry_operations;
> sb->s_maxbytes = MAX_LFS_FILESIZE;
> + err = super_setup_bdi(sb);
> + if (err)
> + goto out;
>
> /* NULL is printed as '(null)' by printf(): avoid that. */
> if (req_root == NULL)
>
The efifb and simplefb drivers just render to a pre-allocated frame buffer
and rely on the display hardware being initialized before the kernel boots.
But if another driver already probed correctly and registered a fbdev, the
generic drivers shouldn't be probed since an actual driver for the display
hardware is already present.
This is more likely to occur after commit d391c5827107 ("drivers/firmware:
move x86 Generic System Framebuffers support") since the "efi-framebuffer"
and "simple-framebuffer" platform devices are registered at a later time.
Link: https://lore.kernel.org/r/20211110200253.rfudkt3edbd3nsyj@lahvuun/
Fixes: d391c5827107 ("drivers/firmware: move x86 Generic System Framebuffers support")
Reported-by: Ilya Trukhanov <lahvuun(a)gmail.com>
Cc: <stable(a)vger.kernel.org> # 5.15.x
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
---
Changes in v3:
- Cc <stable(a)vger.kernel.org> since a Fixes: tag is not enough (gregkh).
Changes in v2:
- Add a Link: tag with a reference to the bug report (Thorsten Leemhuis).
- Add a comment explaining why the probe fails earlier (Daniel Vetter).
- Add a Fixes: tag for stable to pick the fix (Daniel Vetter).
- Add Daniel Vetter's Reviewed-by: tag.
- Improve the commit message and mention the culprit commit
drivers/video/fbdev/efifb.c | 11 +++++++++++
drivers/video/fbdev/simplefb.c | 11 +++++++++++
2 files changed, 22 insertions(+)
diff --git drivers/video/fbdev/efifb.c drivers/video/fbdev/efifb.c
index edca3703b964..ea42ba6445b2 100644
--- drivers/video/fbdev/efifb.c
+++ drivers/video/fbdev/efifb.c
@@ -351,6 +351,17 @@ static int efifb_probe(struct platform_device *dev)
char *option = NULL;
efi_memory_desc_t md;
+ /*
+ * Generic drivers must not be registered if a framebuffer exists.
+ * If a native driver was probed, the display hardware was already
+ * taken and attempting to use the system framebuffer is dangerous.
+ */
+ if (num_registered_fb > 0) {
+ dev_err(&dev->dev,
+ "efifb: a framebuffer is already registered\n");
+ return -EINVAL;
+ }
+
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
return -ENODEV;
diff --git drivers/video/fbdev/simplefb.c drivers/video/fbdev/simplefb.c
index 62f0ded70681..b63074fd892e 100644
--- drivers/video/fbdev/simplefb.c
+++ drivers/video/fbdev/simplefb.c
@@ -407,6 +407,17 @@ static int simplefb_probe(struct platform_device *pdev)
struct simplefb_par *par;
struct resource *mem;
+ /*
+ * Generic drivers must not be registered if a framebuffer exists.
+ * If a native driver was probed, the display hardware was already
+ * taken and attempting to use the system framebuffer is dangerous.
+ */
+ if (num_registered_fb > 0) {
+ dev_err(&pdev->dev,
+ "simplefb: a framebuffer is already registered\n");
+ return -EINVAL;
+ }
+
if (fb_get_options("simplefb", NULL))
return -ENODEV;
--
2.33.1