From: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
The problem apparetly has been known since the conversion to
raw_spin_lock() (commit 4dbada2be460
("gpio: omap: use raw locks for locking")).
Symptom:
[ BUG: Invalid wait context ]
5.10.214
-----------------------------
swapper/1 is trying to lock:
(enable_lock){....}-{3:3}, at: clk_enable_lock
other info that might help us debug this:
context-{5:5}
2 locks held by swapper/1:
#0: (&dev->mutex){....}-{4:4}, at: device_driver_attach
#1: (&bank->lock){....}-{2:2}, at: omap_gpio_set_config
stack backtrace:
CPU: 0 PID: 1 Comm: swapper Not tainted 5.10.214
Hardware name: Generic AM33XX (Flattened Device Tree)
unwind_backtrace
show_stack
__lock_acquire
lock_acquire.part.0
_raw_spin_lock_irqsave
clk_enable_lock
clk_enable
omap_gpio_set_config
gpio_keys_setup_key
gpio_keys_probe
platform_drv_probe
really_probe
driver_probe_device
device_driver_attach
__driver_attach
bus_for_each_dev
bus_add_driver
driver_register
do_one_initcall
do_initcalls
kernel_init_freeable
kernel_init
Problematic spin_lock_irqsave(&enable_lock, ...) is being called by
clk_enable()/clk_disable() in omap2_set_gpio_debounce() and
omap_clear_gpio_debounce().
For omap2_set_gpio_debounce() it's possible to move
raw_spin_lock_irqsave(&bank->lock, ...) inside omap2_set_gpio_debounce()
so that the locks nest as follows:
clk_enable(bank->dbck)
raw_spin_lock_irqsave(&bank->lock, ...)
raw_spin_unlock_irqrestore()
clk_disable()
Two call-sites of omap_clear_gpio_debounce() are more convoluted, but one
can take the advantage of the nesting nature of clk_enable()/clk_disable(),
so that the inner clk_disable() becomes lockless:
clk_enable(bank->dbck) <-- only to clk_enable_lock()
raw_spin_lock_irqsave(&bank->lock, ...)
omap_clear_gpio_debounce()
clk_disable() <-- becomes lockless
raw_spin_unlock_irqrestore()
clk_disable()
Cc: stable(a)vger.kernel.org
Fixes: 4dbada2be460 ("gpio: omap: use raw locks for locking")
Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
---
drivers/gpio/gpio-omap.c | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 7ad4534054962..f9e502aa57753 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -181,6 +181,7 @@ static inline void omap_gpio_dbck_disable(struct gpio_bank *bank)
static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
unsigned debounce)
{
+ unsigned long flags;
u32 val;
u32 l;
bool enable = !!debounce;
@@ -196,13 +197,18 @@ static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
l = BIT(offset);
+ /*
+ * Ordering is important here: clk_enable() calls spin_lock_irqsave(),
+ * therefore it must be outside of the following raw_spin_lock_irqsave()
+ */
clk_enable(bank->dbck);
+ raw_spin_lock_irqsave(&bank->lock, flags);
+
writel_relaxed(debounce, bank->base + bank->regs->debounce);
val = omap_gpio_rmw(bank->base + bank->regs->debounce_en, l, enable);
bank->dbck_enable_mask = val;
- clk_disable(bank->dbck);
/*
* Enable debounce clock per module.
* This call is mandatory because in omap_gpio_request() when
@@ -217,6 +223,9 @@ static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
bank->context.debounce_en = val;
}
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
+ clk_disable(bank->dbck);
+
return 0;
}
@@ -647,6 +656,13 @@ static void omap_gpio_irq_shutdown(struct irq_data *d)
unsigned long flags;
unsigned offset = d->hwirq;
+ /*
+ * Enable the clock here so that the nested clk_disable() in the
+ * following omap_clear_gpio_debounce() is lockless
+ */
+ if (bank->dbck_flag)
+ clk_enable(bank->dbck);
+
raw_spin_lock_irqsave(&bank->lock, flags);
bank->irq_usage &= ~(BIT(offset));
omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
@@ -656,6 +672,9 @@ static void omap_gpio_irq_shutdown(struct irq_data *d)
omap_clear_gpio_debounce(bank, offset);
omap_disable_gpio_module(bank, offset);
raw_spin_unlock_irqrestore(&bank->lock, flags);
+
+ if (bank->dbck_flag)
+ clk_disable(bank->dbck);
}
static void omap_gpio_irq_bus_lock(struct irq_data *data)
@@ -827,6 +846,13 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
struct gpio_bank *bank = gpiochip_get_data(chip);
unsigned long flags;
+ /*
+ * Enable the clock here so that the nested clk_disable() in the
+ * following omap_clear_gpio_debounce() is lockless
+ */
+ if (bank->dbck_flag)
+ clk_enable(bank->dbck);
+
raw_spin_lock_irqsave(&bank->lock, flags);
bank->mod_usage &= ~(BIT(offset));
if (!LINE_USED(bank->irq_usage, offset)) {
@@ -836,6 +862,9 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
omap_disable_gpio_module(bank, offset);
raw_spin_unlock_irqrestore(&bank->lock, flags);
+ if (bank->dbck_flag)
+ clk_disable(bank->dbck);
+
pm_runtime_put(chip->parent);
}
@@ -913,15 +942,11 @@ static int omap_gpio_debounce(struct gpio_chip *chip, unsigned offset,
unsigned debounce)
{
struct gpio_bank *bank;
- unsigned long flags;
int ret;
bank = gpiochip_get_data(chip);
- raw_spin_lock_irqsave(&bank->lock, flags);
ret = omap2_set_gpio_debounce(bank, offset, debounce);
- raw_spin_unlock_irqrestore(&bank->lock, flags);
-
if (ret)
dev_info(chip->parent,
"Could not set line %u debounce to %u microseconds (%d)",
--
2.47.0
[BUG]
If submit_one_sector() failed inside extent_writepage_io() for sector
size < page size cases (e.g. 4K sector size and 64K page size), then
we can hit double ordered extent accounting error.
This should be very rare, as submit_one_sector() only fails when we
failed to grab the extent map, and such extent map should exist inside
the memory and have been pinned.
[CAUSE]
For example we have the following folio layout:
0 4K 32K 48K 60K 64K
|//| |//////| |///|
Where |///| is the dirty range we need to writeback. The 3 different
dirty ranges are submitted for regular COW.
Now we hit the following sequence:
- submit_one_sector() returned 0 for [0, 4K)
- submit_one_sector() returned 0 for [32K, 48K)
- submit_one_sector() returned error for [60K, 64K)
- btrfs_mark_ordered_io_finished() called for the whole folio
This will mark the following ranges as finished:
* [0, 4K)
* [32K, 48K)
Both ranges have their IO already submitted, this cleanup will
lead to double accounting.
* [60K, 64K)
That's the correct cleanup.
The only good news is, this error is only theoretical, as the target
extent map is always pinned, thus we should directly grab it from
memory, other than reading it from the disk.
[FIX]
Instead of calling btrfs_mark_ordered_io_finished() for the whole folio
range, which can touch ranges we should not touch, instead
move the error handling inside extent_writepage_io().
So that we can cleanup exact sectors that are ought to be submitted but
failed.
This provide much more accurate cleanup, avoiding the double accounting.
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/extent_io.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d619c4e148be..b74298c2c24f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1418,6 +1418,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
+ bool error = false;
const u64 folio_start = folio_pos(folio);
u64 cur;
int bit;
@@ -1460,11 +1461,21 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
break;
}
ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
- if (ret < 0)
- goto out;
+ if (unlikely(ret < 0)) {
+ submit_one_bio(bio_ctrl);
+ /*
+ * Failed to grab the extent map which should be very rare.
+ * Since there is no bio submitted to finish the ordered
+ * extent, we have to manually finish this sector.
+ */
+ btrfs_mark_ordered_io_finished(inode, folio, cur,
+ fs_info->sectorsize, false);
+ error = true;
+ continue;
+ }
submitted_io = true;
}
-out:
+
/*
* If we didn't submitted any sector (>= i_size), folio dirty get
* cleared but PAGECACHE_TAG_DIRTY is not cleared (only cleared
@@ -1472,8 +1483,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
*
* Here we set writeback and clear for the range. If the full folio
* is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag.
+ *
+ * If we hit any error, the corresponding sector will still be dirty
+ * thus no need to clear PAGECACHE_TAG_DIRTY.
*/
- if (!submitted_io) {
+ if (!submitted_io && !error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
@@ -1493,7 +1507,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
{
struct inode *inode = folio->mapping->host;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- const u64 page_start = folio_pos(folio);
int ret;
size_t pg_offset;
loff_t i_size = i_size_read(inode);
@@ -1536,10 +1549,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
bio_ctrl->wbc->nr_to_write--;
- if (ret)
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
- page_start, PAGE_SIZE, !ret);
-
done:
if (ret < 0)
mapping_set_error(folio->mapping, ret);
@@ -2320,11 +2329,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
if (ret == 1)
goto next_page;
- if (ret) {
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
- cur, cur_len, !ret);
+ if (ret)
mapping_set_error(mapping, ret);
- }
btrfs_folio_end_lock(fs_info, folio, cur, cur_len);
if (ret < 0)
found_error = true;
--
2.47.0
[BUG]
There are several double accounting case, where the WARN_ON_ONCE() is
triggered inside can_finish_ordered_extent().
And all such cases points back to the btrfs_mark_ordered_io_finished()
call inside extent_writepage() when it hits some error.
[CAUSE]
With extra debug patches to show where the error is from, it turns out
to be btrfs_run_delalloc_range() can fail with -ENOSPC.
Such failure itself is already a symptom of some bad data/metadata space
reservation, but here we need to focus on the error handling part.
For example, we have the following dirty page layout (4K sector size and
4K page size):
0 16K 32K
|/////|/////|/////|/////|/////|/////|/////|/////|
Where the range [0, 32K) is dirty and we need to write all the 8 pages
back.
When handling the first page 0, we go the following sequence:
- btrfs_run_delalloc_range() for range [0, 32k)
We enter cow_file_range() for [0, 32K)
- btrfs_reserve_extent() only returned a 16K data extent.
This can be caused by fragmentation, and it's already an indication
we're almost running of space.
Now we have the following layout:
0 16K 32K
|<----- Reserved ------>|/////|/////|/////|/////|
The range [0, 16K) has ordered extent allocated.
- btrfs_reserve_extent() returned -ENOSPC
We really run out of space. But since we have reserved space
for range [0, 16K) we need to clean them up.
But that cleanup for ordered extent only happens inside
btrfs_run_delalloc_range().
- btrfs_run_delalloc_range() cleanup the reserved ordered extent
By calling btrfs_mark_ordered_io_finished() for range [0, 32K).
It will locate the ordered extent [0, 16K) and mark it as IOERR.
Also since the ordered extent is only 16K, we're finishing the whole
ordered extent.
Thus we call btrfs_queue_ordered_fn() to queue to finish the ordered
extent.
But still, the ordered extent [0, 16K) is still in the
btrfs_inode::ordered_tree.
- extent_writepage() cleanup the ordered extent inside the folio
We call btrfs_mark_ordered_io_finished() for range [0, 4K).
Since the finished ordered extent [0, 16K) is not yet removed (racy,
depends on when btrfs_finish_one_ordered() is called), if
btrfs_mark_ordered_io_finished() is called before
btrfs_finish_one_ordered(), we will double account and trigger the
warning inside can_finish_ordered_extent().
So the root cause is, we're relying on btrfs_mark_ordered_io_finished()
to handle ranges which is already cleaned up.
Unfortunately the bug dates back to the early days when
btrfs_mark_ordered_io_finished() is introduced as a no-brain choice for
error paths, but such no-brain solution just hides all the race and make
us less cautious when handling errors.
[FIX]
Instead of relying on the btrfs_mark_ordered_io_finished() call to
cleanup the whole folio range, record the last successfully ran delalloc
range.
And combined with bio_ctrl->submit_bitmap to properly clean up any newly
created ordered extents.
Since we have cleaned up the ordered extents in range, we should not
rely on the btrfs_mark_ordered_io_finished() inside extent_writepage()
anymore.
By this, we ensure btrfs_mark_ordered_io_finished() is only called once
when writepage_delalloc() failed.
Cc: stable(a)vger.kernel.org # 5.15+
Fixes: e65f152e4348 ("btrfs: refactor how we finish ordered extent io for endio functions")
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++++++++++-----
1 file changed, 32 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 438974d4def4..d619c4e148be 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1167,6 +1167,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
* last delalloc end.
*/
u64 last_delalloc_end = 0;
+ /*
+ * Save the last successfully ran delalloc range end (exclusive).
+ * This is for error handling to avoid ranges with ordered extent created
+ * but no IO will be submitted due to error.
+ */
+ u64 last_finished = page_start;
u64 delalloc_start = page_start;
u64 delalloc_end = page_end;
u64 delalloc_to_write = 0;
@@ -1235,11 +1241,19 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
found_len = last_delalloc_end + 1 - found_start;
if (ret >= 0) {
+ /*
+ * Some delalloc range may be created by previous folios.
+ * Thus we still need to clean those range up during error
+ * handling.
+ */
+ last_finished = found_start;
/* No errors hit so far, run the current delalloc range. */
ret = btrfs_run_delalloc_range(inode, folio,
found_start,
found_start + found_len - 1,
wbc);
+ if (ret >= 0)
+ last_finished = found_start + found_len;
} else {
/*
* We've hit an error during previous delalloc range,
@@ -1274,8 +1288,21 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
delalloc_start = found_start + found_len;
}
- if (ret < 0)
+ /*
+ * It's possible we have some ordered extents created before we hit
+ * an error, cleanup non-async successfully created delalloc ranges.
+ */
+ if (unlikely(ret < 0)) {
+ unsigned int bitmap_size = min(
+ (last_finished - page_start) >> fs_info->sectorsize_bits,
+ fs_info->sectors_per_page);
+
+ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size)
+ btrfs_mark_ordered_io_finished(inode, folio,
+ page_start + (bit << fs_info->sectorsize_bits),
+ fs_info->sectorsize, false);
return ret;
+ }
out:
if (last_delalloc_end)
delalloc_end = last_delalloc_end;
@@ -1509,13 +1536,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
bio_ctrl->wbc->nr_to_write--;
-done:
- if (ret) {
+ if (ret)
btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
page_start, PAGE_SIZE, !ret);
- mapping_set_error(folio->mapping, ret);
- }
+done:
+ if (ret < 0)
+ mapping_set_error(folio->mapping, ret);
/*
* Only unlock ranges that are submitted. As there can be some async
* submitted ranges inside the folio.
--
2.47.0
Hi,
This series fixes the UFS resume from suspend issue by marking the UFS PHY GDSCs
as ALWAYS_ON. Starting from SM8550, UFS PHY GDSCs doesn't support retention
state. So we should keep them always on so that they don't loose the state
during suspend.
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
Manivannan Sadhasivam (2):
clk: qcom: gcc-sm8550: Keep UFS PHY GDSCs ALWAYS_ON
clk: qcom: gcc-sm8650: Keep UFS PHY GDSCs ALWAYS_ON
drivers/clk/qcom/gcc-sm8550.c | 4 ++--
drivers/clk/qcom/gcc-sm8650.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
---
base-commit: 9852d85ec9d492ebef56dc5f229416c925758edc
change-id: 20241107-ufs-clk-fix-e49ee2097594
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Patches 1 and 2 of this series fix the issue reported by Hsin-Te Yuan
[1] where MT8192-based Chromebooks are not able to suspend/resume 10
times in a row. Either one of those patches on its own is enough to fix
the issue, but I believe both are desirable, so I've included them both
here.
Patches 3-5 fix unrelated issues that I've noticed while debugging.
Patch 3 fixes IRQ storms when the temperature sensors drop to 20
Celsius. Patches 4 and 5 are cleanups to prevent future issues.
To test this series, I've run 'rtcwake -m mem -d 60' 10 times in a row
on a MT8192-Asurada-Spherion-rev3 Chromebook and checked that the wakeup
happened 60 seconds later (+-5 seconds). I've repeated that test on 10
separate runs. Not once did the chromebook wake up early with the series
applied.
I've also checked that during those runs, the LVTS interrupt didn't
trigger even once, while before the series it would trigger a few times
per run, generally during boot or resume.
Finally, as a sanity check I've verified that the interrupts still work
by lowering the thermal trip point to 45 Celsius and running 'stress -c
8'. Indeed they still do, and the temperature showed by the
thermal_temperature ftrace event matched the expected value.
[1] https://lore.kernel.org/all/20241108-lvts-v1-1-eee339c6ca20@chromium.org/
Signed-off-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
---
Nícolas F. R. A. Prado (5):
thermal/drivers/mediatek/lvts: Disable monitor mode during suspend
thermal/drivers/mediatek/lvts: Disable Stage 3 thermal threshold
thermal/drivers/mediatek/lvts: Disable low offset IRQ for minimum threshold
thermal/drivers/mediatek/lvts: Start sensor interrupts disabled
thermal/drivers/mediatek/lvts: Only update IRQ enable for valid sensors
drivers/thermal/mediatek/lvts_thermal.c | 103 ++++++++++++++++++++++----------
1 file changed, 72 insertions(+), 31 deletions(-)
---
base-commit: b852e1e7a0389ed6168ef1d38eb0bad71a6b11e8
change-id: 20241121-mt8192-lvts-filtered-suspend-fix-a5032ca8eceb
Best regards,
--
Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
v3:
- Fixes commit log "per which" - Bryan
- Link to v2: https://lore.kernel.org/r/20241125-b4-linux-next-24-11-18-clock-multiple-po…
v2:
The main change in this version is Bjorn's pointing out that pm_runtime_*
inside of the gdsc_enable/gdsc_disable path would be recursive and cause a
lockdep splat. Dmitry alluded to this too.
Bjorn pointed to stuff being done lower in the gdsc_register() routine that
might be a starting point.
I iterated around that idea and came up with patch #3. When a gdsc has no
parent and the pd_list is non-NULL then attach that orphan GDSC to the
clock controller power-domain list.
Existing subdomain code in gdsc_register() will connect the parent GDSCs in
the clock-controller to the clock-controller subdomain, the new code here
does that same job for a list of power-domains the clock controller depends
on.
To Dmitry's point about MMCX and MCX dependencies for the registers inside
of the clock controller, I have switched off all references in a test dtsi
and confirmed that accessing the clock-controller regs themselves isn't
required.
On the second point I also verified my test branch with lockdep on which
was a concern with the pm_domain version of this solution but I wanted to
cover it anyway with the new approach for completeness sake.
Here's the item-by-item list of changes:
- Adds a patch to capture pm_genpd_add_subdomain() result code - Bryan
- Changes changelog of second patch to remove singleton and generally
to make the commit log easier to understand - Bjorn
- Uses demv_pm_domain_attach_list - Vlad
- Changes error check to if (ret < 0 && ret != -EEXIST) - Vlad
- Retains passing &pd_data instead of NULL - because NULL doesn't do
the same thing - Bryan/Vlad
- Retains standalone function qcom_cc_pds_attach() because the pd_data
enumeration looks neater in a standalone function - Bryan/Vlad
- Drops pm_runtime in favour of gdsc_add_subdomain_list() for each
power-domain in the pd_list.
The pd_list will be whatever is pointed to by power-domains = <>
in the dtsi - Bjorn
- Link to v1: https://lore.kernel.org/r/20241118-b4-linux-next-24-11-18-clock-multiple-po…
v1:
On x1e80100 and it's SKUs the Camera Clock Controller - CAMCC has
multiple power-domains which power it. Usually with a single power-domain
the core platform code will automatically switch on the singleton
power-domain for you. If you have multiple power-domains for a device, in
this case the clock controller, you need to switch those power-domains
on/off yourself.
The clock controllers can also contain Global Distributed
Switch Controllers - GDSCs which themselves can be referenced from dtsi
nodes ultimately triggering a gdsc_en() in drivers/clk/qcom/gdsc.c.
As an example:
cci0: cci@ac4a000 {
power-domains = <&camcc TITAN_TOP_GDSC>;
};
This series adds the support to attach a power-domain list to the
clock-controllers and the GDSCs those controllers provide so that in the
case of the above example gdsc_toggle_logic() will trigger the power-domain
list with pm_runtime_resume_and_get() and pm_runtime_put_sync()
respectively.
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
---
Bryan O'Donoghue (3):
clk: qcom: gdsc: Capture pm_genpd_add_subdomain result code
clk: qcom: common: Add support for power-domain attachment
driver: clk: qcom: Support attaching subdomain list to multiple parents
drivers/clk/qcom/common.c | 21 +++++++++++++++++++++
drivers/clk/qcom/gdsc.c | 41 +++++++++++++++++++++++++++++++++++++++--
drivers/clk/qcom/gdsc.h | 1 +
3 files changed, 61 insertions(+), 2 deletions(-)
---
base-commit: 744cf71b8bdfcdd77aaf58395e068b7457634b2c
change-id: 20241118-b4-linux-next-24-11-18-clock-multiple-power-domains-a5f994dc452a
Best regards,
--
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
From: Chuck Lever <chuck.lever(a)oracle.com>
Testing shows that the EBUSY error return from mtree_alloc_cyclic()
leaks into user space. The ERRORS section of "man creat(2)" says:
> EBUSY O_EXCL was specified in flags and pathname refers
> to a block device that is in use by the system
> (e.g., it is mounted).
ENOSPC is closer to what applications expect in this situation.
Note that the normal range of simple directory offset values is
2..2^63, so hitting this error is going to be rare to impossible.
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Cc: <stable(a)vger.kernel.org> # v6.9+
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/libfs.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 46966fd8bcf9..bf67954b525b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -288,7 +288,9 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
- if (ret < 0)
+ if (unlikely(ret == -EBUSY))
+ return -ENOSPC;
+ if (unlikely(ret < 0))
return ret;
offset_set(dentry, offset);
--
2.47.0
XE_CACHE_WB must be converted into the per-platform pat index for that
particular caching mode, otherwise we are just encoding whatever happens
to be the value of that enum.
Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Nirmoy Das <nirmoy.das(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.12+
---
drivers/gpu/drm/xe/xe_migrate.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cfd31ae49cc1..48e205a40fd2 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1350,6 +1350,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
/* For sysmem PTE's, need to map them in our hole.. */
if (!IS_DGFX(xe)) {
+ u16 pat_index = xe->pat.idx[XE_CACHE_WB];
u32 ptes, ofs;
ppgtt_ofs = NUM_KERNEL_PDE - 1;
@@ -1409,7 +1410,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
pt_bo->update_index = current_update;
addr = vm->pt_ops->pte_encode_bo(pt_bo, 0,
- XE_CACHE_WB, 0);
+ pat_index, 0);
bb->cs[bb->len++] = lower_32_bits(addr);
bb->cs[bb->len++] = upper_32_bits(addr);
}
--
2.47.0
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 12aaf67584cf19dc84615b7aba272fe642c35b8b
Gitweb: https://git.kernel.org/tip/12aaf67584cf19dc84615b7aba272fe642c35b8b
Author: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
AuthorDate: Thu, 21 Nov 2024 12:48:25
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 26 Nov 2024 19:58:27 +01:00
irqchip/irq-mvebu-sei: Move misplaced select() callback to SEI CP domain
Commit fbdf14e90ce4 ("irqchip/irq-mvebu-sei: Switch to MSI parent")
introduced in v6.11-rc1 broke Mavell Armada platforms (and possibly others)
by incorrectly switching irq-mvebu-sei to MSI parent.
In the above commit, msi_parent_ops is set for the sei->cp_domain, but
rather than adding a .select method to mvebu_sei_cp_domain_ops (which is
associated with sei->cp_domain), it was added to mvebu_sei_domain_ops which
is associated with sei->sei_domain, which doesn't have any
msi_parent_ops. This makes the call to msi_lib_irq_domain_select() always
fail.
This bug manifests itself with the following kernel messages on Armada 8040
based systems:
platform f21e0000.interrupt-controller:interrupt-controller@50: deferred probe pending: (reason unknown)
platform f41e0000.interrupt-controller:interrupt-controller@50: deferred probe pending: (reason unknown)
Move the select callback to mvebu_sei_cp_domain_ops to cure it.
Fixes: fbdf14e90ce4 ("irqchip/irq-mvebu-sei: Switch to MSI parent")
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/E1tE6bh-004CmX-QU@rmk-PC.armlinux.org.uk
---
drivers/irqchip/irq-mvebu-sei.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index f8c70f2..065166a 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -192,7 +192,6 @@ static void mvebu_sei_domain_free(struct irq_domain *domain, unsigned int virq,
}
static const struct irq_domain_ops mvebu_sei_domain_ops = {
- .select = msi_lib_irq_domain_select,
.alloc = mvebu_sei_domain_alloc,
.free = mvebu_sei_domain_free,
};
@@ -306,6 +305,7 @@ static void mvebu_sei_cp_domain_free(struct irq_domain *domain,
}
static const struct irq_domain_ops mvebu_sei_cp_domain_ops = {
+ .select = msi_lib_irq_domain_select,
.alloc = mvebu_sei_cp_domain_alloc,
.free = mvebu_sei_cp_domain_free,
};