If the clock tu->ick was not enabled in tahvo_usb_probe,
it may still hold a non-error pointer, potentially causing
the clock to be incorrectly disabled later in the function.
Use the devm_clk_get_enabled helper function to ensure proper call balance
for tu->ick.
Found by Linux Verification Center (linuxtesting.org) with Klever.
Fixes: 9ba96ae5074c ("usb: omap1: Tahvo USB transceiver driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vitalii Mordan <mordan(a)ispras.ru>
---
v2: Corrected a typo in the error handling of the devm_clk_get_enabled
call. This issue was reported by Dan Carpenter <dan.carpenter(a)linaro.org>.
drivers/usb/phy/phy-tahvo.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index ae7bf3ff89ee..4182e86dc450 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -341,9 +341,11 @@ static int tahvo_usb_probe(struct platform_device *pdev)
mutex_init(&tu->serialize);
- tu->ick = devm_clk_get(&pdev->dev, "usb_l4_ick");
- if (!IS_ERR(tu->ick))
- clk_enable(tu->ick);
+ tu->ick = devm_clk_get_enabled(&pdev->dev, "usb_l4_ick");
+ if (IS_ERR(tu->ick)) {
+ dev_err(&pdev->dev, "failed to get and enable clock\n");
+ return PTR_ERR(tu->ick);
+ }
/*
* Set initial state, so that we generate kevents only on state changes.
@@ -353,15 +355,14 @@ static int tahvo_usb_probe(struct platform_device *pdev)
tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable);
if (IS_ERR(tu->extcon)) {
dev_err(&pdev->dev, "failed to allocate memory for extcon\n");
- ret = PTR_ERR(tu->extcon);
- goto err_disable_clk;
+ return PTR_ERR(tu->extcon);
}
ret = devm_extcon_dev_register(&pdev->dev, tu->extcon);
if (ret) {
dev_err(&pdev->dev, "could not register extcon device: %d\n",
ret);
- goto err_disable_clk;
+ return ret;
}
/* Set the initial cable state. */
@@ -384,7 +385,7 @@ static int tahvo_usb_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "cannot register USB transceiver: %d\n",
ret);
- goto err_disable_clk;
+ return ret;
}
dev_set_drvdata(&pdev->dev, tu);
@@ -405,9 +406,6 @@ static int tahvo_usb_probe(struct platform_device *pdev)
err_remove_phy:
usb_remove_phy(&tu->phy);
-err_disable_clk:
- if (!IS_ERR(tu->ick))
- clk_disable(tu->ick);
return ret;
}
@@ -418,8 +416,6 @@ static void tahvo_usb_remove(struct platform_device *pdev)
free_irq(tu->irq, tu);
usb_remove_phy(&tu->phy);
- if (!IS_ERR(tu->ick))
- clk_disable(tu->ick);
}
static struct platform_driver tahvo_usb_driver = {
--
2.25.1
If the clock tu->ick was not enabled in tahvo_usb_probe,
it may still hold a non-error pointer, potentially causing
the clock to be incorrectly disabled later in the function.
Use the devm_clk_get_enabled helper function to ensure proper call balance
for tu->ick.
Found by Linux Verification Center (linuxtesting.org) with Klever.
Fixes: 9ba96ae5074c ("usb: omap1: Tahvo USB transceiver driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vitalii Mordan <mordan(a)ispras.ru>
---
drivers/usb/phy/phy-tahvo.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index ae7bf3ff89ee..d393308d23d4 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -341,9 +341,11 @@ static int tahvo_usb_probe(struct platform_device *pdev)
mutex_init(&tu->serialize);
- tu->ick = devm_clk_get(&pdev->dev, "usb_l4_ick");
- if (!IS_ERR(tu->ick))
- clk_enable(tu->ick);
+ tu->ick = devm_clk_get_enabled(&pdev->dev, "usb_l4_ick");
+ if (!IS_ERR(tu->ick)) {
+ dev_err(&pdev->dev, "failed to get and enable clock\n");
+ return PTR_ERR(tu->ick);
+ }
/*
* Set initial state, so that we generate kevents only on state changes.
@@ -353,15 +355,14 @@ static int tahvo_usb_probe(struct platform_device *pdev)
tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable);
if (IS_ERR(tu->extcon)) {
dev_err(&pdev->dev, "failed to allocate memory for extcon\n");
- ret = PTR_ERR(tu->extcon);
- goto err_disable_clk;
+ return PTR_ERR(tu->extcon);
}
ret = devm_extcon_dev_register(&pdev->dev, tu->extcon);
if (ret) {
dev_err(&pdev->dev, "could not register extcon device: %d\n",
ret);
- goto err_disable_clk;
+ return ret;
}
/* Set the initial cable state. */
@@ -384,7 +385,7 @@ static int tahvo_usb_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "cannot register USB transceiver: %d\n",
ret);
- goto err_disable_clk;
+ return ret;
}
dev_set_drvdata(&pdev->dev, tu);
@@ -405,9 +406,6 @@ static int tahvo_usb_probe(struct platform_device *pdev)
err_remove_phy:
usb_remove_phy(&tu->phy);
-err_disable_clk:
- if (!IS_ERR(tu->ick))
- clk_disable(tu->ick);
return ret;
}
@@ -418,8 +416,6 @@ static void tahvo_usb_remove(struct platform_device *pdev)
free_irq(tu->irq, tu);
usb_remove_phy(&tu->phy);
- if (!IS_ERR(tu->ick))
- clk_disable(tu->ick);
}
static struct platform_driver tahvo_usb_driver = {
--
2.25.1
From: Chuck Lever <chuck.lever(a)oracle.com>
Testing shows that the EBUSY error return from mtree_alloc_cyclic()
leaks into user space. The ERRORS section of "man creat(2)" says:
> EBUSY O_EXCL was specified in flags and pathname refers
> to a block device that is in use by the system
> (e.g., it is mounted).
ENOSPC is closer to what applications expect in this situation.
Note that the normal range of simple directory offset values is
2..2^63, so hitting this error is going to be rare to impossible.
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Cc: <stable(a)vger.kernel.org> # v6.9+
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Yang Erkun <yangerkun(a)huawei.com>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/libfs.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 748ac5923154..f6d04c69f195 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -292,7 +292,9 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
- if (ret < 0)
+ if (unlikely(ret == -EBUSY))
+ return -ENOSPC;
+ if (unlikely(ret < 0))
return ret;
offset_set(dentry, offset);
--
2.47.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024121506-pancreas-mosaic-0ae0@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa(a)kernel.org>
Date: Sun, 8 Dec 2024 15:25:07 +0100
Subject: [PATCH] bpf,perf: Fix invalid prog_array access in
perf_event_detach_bpf_prog
Syzbot reported [1] crash that happens for following tracing scenario:
- create tracepoint perf event with attr.inherit=1, attach it to the
process and set bpf program to it
- attached process forks -> chid creates inherited event
the new child event shares the parent's bpf program and tp_event
(hence prog_array) which is global for tracepoint
- exit both process and its child -> release both events
- first perf_event_detach_bpf_prog call will release tp_event->prog_array
and second perf_event_detach_bpf_prog will crash, because
tp_event->prog_array is NULL
The fix makes sure the perf_event_detach_bpf_prog checks prog_array
is valid before it tries to remove the bpf program from it.
[1] https://lore.kernel.org/bpf/Z1MR6dCIKajNS6nU@krava/T/#m91dbf0688221ec7a7fc9…
Fixes: 0ee288e69d03 ("bpf,perf: Fix perf_event_detach_bpf_prog error handling")
Reported-by: syzbot+2e0d2840414ce817aaac(a)syzkaller.appspotmail.com
Signed-off-by: Jiri Olsa <jolsa(a)kernel.org>
Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org>
Link: https://lore.kernel.org/bpf/20241208142507.1207698-1-jolsa@kernel.org
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a403b05a7091..1b8db5aee9d3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2250,6 +2250,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ if (!old_array)
+ goto put;
+
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
@@ -2258,6 +2261,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_free_sleepable(old_array);
}
+put:
/*
* It could be that the bpf_prog is not sleepable (and will be freed
* via normal RCU), but is called from a point that supports sleepable
From: yangge <yangge1116(a)126.com>
Since commit 984fdba6a32e ("mm, compaction: use proper alloc_flags
in __compaction_suitable()") allow compaction to proceed when free
pages required for compaction reside in the CMA pageblocks, it's
possible that __compaction_suitable() always returns true, and in
some cases, it's not acceptable.
There are 4 NUMA nodes on my machine, and each NUMA node has 32GB
of memory. I have configured 16GB of CMA memory on each NUMA node,
and starting a 32GB virtual machine with device passthrough is
extremely slow, taking almost an hour.
During the start-up of the virtual machine, it will call
pin_user_pages_remote(..., FOLL_LONGTERM, ...) to allocate memory.
Long term GUP cannot allocate memory from CMA area, so a maximum
of 16 GB of no-CMA memory on a NUMA node can be used as virtual
machine memory. Since there is 16G of free CMA memory on the NUMA
node, watermark for order-0 always be met for compaction, so
__compaction_suitable() always returns true, even if the node is
unable to allocate non-CMA memory for the virtual machine.
For costly allocations, because __compaction_suitable() always
returns true, __alloc_pages_slowpath() can't exit at the appropriate
place, resulting in excessively long virtual machine startup times.
Call trace:
__alloc_pages_slowpath
if (compact_result == COMPACT_SKIPPED ||
compact_result == COMPACT_DEFERRED)
goto nopage; // should exit __alloc_pages_slowpath() from here
In order to quickly fall back to remote node, we should remove
ALLOC_CMA both in __compaction_suitable() and __isolate_free_page()
in long term GUP flow. After this fix, starting a 32GB virtual machine
with device passthrough takes only a few seconds.
Fixes: 984fdba6a32e ("mm, compaction: use proper alloc_flags in __compaction_suitable()")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: yangge <yangge1116(a)126.com>
---
V4:
- rich the commit log description
V3:
- fix build errors
- add ALLOC_CMA both in should_continue_reclaim() and compaction_ready()
V2:
- using the 'cc->alloc_flags' to determin if 'ALLOC_CMA' is needed
- rich the commit log description
include/linux/compaction.h | 6 ++++--
mm/compaction.c | 18 +++++++++++-------
mm/page_alloc.c | 4 +++-
mm/vmscan.c | 4 ++--
4 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e947764..b4c3ac3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -90,7 +90,8 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
struct page **page);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern bool compaction_suitable(struct zone *zone, int order,
- int highest_zoneidx);
+ int highest_zoneidx,
+ unsigned int alloc_flags);
extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
@@ -108,7 +109,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
}
static inline bool compaction_suitable(struct zone *zone, int order,
- int highest_zoneidx)
+ int highest_zoneidx,
+ unsigned int alloc_flags)
{
return false;
}
diff --git a/mm/compaction.c b/mm/compaction.c
index 07bd227..585f5ab 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2381,9 +2381,11 @@ static enum compact_result compact_finished(struct compact_control *cc)
static bool __compaction_suitable(struct zone *zone, int order,
int highest_zoneidx,
+ unsigned int alloc_flags,
unsigned long wmark_target)
{
unsigned long watermark;
+ bool use_cma;
/*
* Watermarks for order-0 must be met for compaction to be able to
* isolate free pages for migration targets. This means that the
@@ -2395,25 +2397,27 @@ static bool __compaction_suitable(struct zone *zone, int order,
* even if compaction succeeds.
* For costly orders, we require low watermark instead of min for
* compaction to proceed to increase its chances.
- * ALLOC_CMA is used, as pages in CMA pageblocks are considered
- * suitable migration targets
+ * In addition to long term GUP flow, ALLOC_CMA is used, as pages in
+ * CMA pageblocks are considered suitable migration targets
*/
watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
low_wmark_pages(zone) : min_wmark_pages(zone);
watermark += compact_gap(order);
+ use_cma = !!(alloc_flags & ALLOC_CMA);
return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
- ALLOC_CMA, wmark_target);
+ use_cma ? ALLOC_CMA : 0, wmark_target);
}
/*
* compaction_suitable: Is this suitable to run compaction on this zone now?
*/
-bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx)
+bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx,
+ unsigned int alloc_flags)
{
enum compact_result compact_result;
bool suitable;
- suitable = __compaction_suitable(zone, order, highest_zoneidx,
+ suitable = __compaction_suitable(zone, order, highest_zoneidx, alloc_flags,
zone_page_state(zone, NR_FREE_PAGES));
/*
* fragmentation index determines if allocation failures are due to
@@ -2474,7 +2478,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
available = zone_reclaimable_pages(zone) / order;
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
if (__compaction_suitable(zone, order, ac->highest_zoneidx,
- available))
+ alloc_flags, available))
return true;
}
@@ -2499,7 +2503,7 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order,
alloc_flags))
return COMPACT_SUCCESS;
- if (!compaction_suitable(zone, order, highest_zoneidx))
+ if (!compaction_suitable(zone, order, highest_zoneidx, alloc_flags))
return COMPACT_SKIPPED;
return COMPACT_CONTINUE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dde19db..9a5dfda 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2813,6 +2813,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
{
struct zone *zone = page_zone(page);
int mt = get_pageblock_migratetype(page);
+ bool pin;
if (!is_migrate_isolate(mt)) {
unsigned long watermark;
@@ -2823,7 +2824,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
* exists.
*/
watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+ pin = !!(current->flags & PF_MEMALLOC_PIN);
+ if (!zone_watermark_ok(zone, 0, watermark, 0, pin ? 0 : ALLOC_CMA))
return 0;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5e03a61..33f5b46 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5815,7 +5815,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
sc->reclaim_idx, 0))
return false;
- if (compaction_suitable(zone, sc->order, sc->reclaim_idx))
+ if (compaction_suitable(zone, sc->order, sc->reclaim_idx, ALLOC_CMA))
return false;
}
@@ -6043,7 +6043,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
return true;
/* Compaction cannot yet proceed. Do reclaim. */
- if (!compaction_suitable(zone, sc->order, sc->reclaim_idx))
+ if (!compaction_suitable(zone, sc->order, sc->reclaim_idx, ALLOC_CMA))
return false;
/*
--
2.7.4
Ensure a non-interruptible wait is used when moving a bo to
XE_PL_SYSTEM. This prevents dma_mappings from being removed prematurely
while a GPU job is still in progress, even if the CPU receives a
signal during the operation.
Fixes: 75521e8b56e8 ("drm/xe: Perform dma_map when moving system buffer objects to TT")
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.11+
Suggested-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das(a)intel.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
---
drivers/gpu/drm/xe/xe_bo.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 283cd0294570..06931df876ab 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -733,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
new_mem->mem_type == XE_PL_SYSTEM) {
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP,
- true,
+ false,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
ret = timeout;
--
2.46.0
This change is specific to Hyper-V VM user.
If the Virtual Machine Connection window is focused,
a Hyper-V VM user can unintentionally touch the keyboard/mouse
when the VM is hibernating or resuming, and consequently the
hibernation or resume operation can be aborted unexpectedly.
Fix the issue by no longer registering the keyboard/mouse as
wakeup devices (see the other two patches for the
changes to drivers/input/serio/hyperv-keyboard.c and
drivers/hid/hid-hyperv.c).
The keyboard/mouse were registered as wakeup devices because the
VM needs to be woken up from the Suspend-to-Idle state after
a user runs "echo freeze > /sys/power/state". It seems like
the Suspend-to-Idle feature has no real users in practice, so
let's no longer support that by returning -EOPNOTSUPP if a
user tries to use that.
Fixes: 1a06d017fb3f ("Drivers: hv: vmbus: Fix Suspend-to-Idle for Generation-2 VM")
Cc: stable(a)vger.kernel.org
Signed-off-by: Saurabh Sengar <ssengar(a)linux.microsoft.com>
Signed-off-by: Erni Sri Satya Vennela <ernis(a)linux.microsoft.com>>
---
Changes in v4:
* No change
Changes in v3:
* Add "Cc: stable(a)vger.kernel.org" in sign-off area.
Changes in v2:
* Add "#define vmbus_freeze NULL" when CONFIG_PM_SLEEP is not
enabled.
---
drivers/hv/vmbus_drv.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6d89d37b069a..4df6b12bf6a1 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -900,6 +900,19 @@ static void vmbus_shutdown(struct device *child_device)
}
#ifdef CONFIG_PM_SLEEP
+/*
+ * vmbus_freeze - Suspend-to-Idle
+ */
+static int vmbus_freeze(struct device *child_device)
+{
+/*
+ * Do not support Suspend-to-Idle ("echo freeze > /sys/power/state") as
+ * that would require registering the Hyper-V synthetic mouse/keyboard
+ * devices as wakeup devices, which can abort hibernation/resume unexpectedly.
+ */
+ return -EOPNOTSUPP;
+}
+
/*
* vmbus_suspend - Suspend a vmbus device
*/
@@ -938,6 +951,7 @@ static int vmbus_resume(struct device *child_device)
return drv->resume(dev);
}
#else
+#define vmbus_freeze NULL
#define vmbus_suspend NULL
#define vmbus_resume NULL
#endif /* CONFIG_PM_SLEEP */
@@ -969,7 +983,7 @@ static void vmbus_device_release(struct device *device)
*/
static const struct dev_pm_ops vmbus_pm = {
- .suspend_noirq = NULL,
+ .suspend_noirq = vmbus_freeze,
.resume_noirq = NULL,
.freeze_noirq = vmbus_suspend,
.thaw_noirq = vmbus_resume,
--
2.34.1