If devicetree describes power supplies related to a PCI device, we
previously created a pwrctrl device even if CONFIG_PCI_PWRCTL was
not enabled.
When pci_pwrctrl_create_device() creates and returns a pwrctrl device,
pci_scan_device() doesn't enumerate the PCI device. It assumes the pwrctrl
core will rescan the bus after turning on the power. However, if
CONFIG_PCI_PWRCTL is not enabled, the rescan never happens.
This may break PCI enumeration on any system that describes power supplies
in devicetree but does not use pwrctrl. Jim reported that some brcmstb
platforms break this way.
While the actual fix would be to convert all the platforms to use pwrctrl
framework, we also need to skip creating the pwrctrl device if
CONFIG_PCI_PWRCTL is not enabled and let the PCI core scan the device
normally (assuming it is already powered on or by the controller driver).
Cc: stable(a)vger.kernel.org # 6.15
Fixes: 957f40d039a9 ("PCI/pwrctrl: Move creation of pwrctrl devices to pci_scan_device()")
Reported-by: Jim Quinlan <james.quinlan(a)broadcom.com>
Closes: https://lore.kernel.org/r/CA+-6iNwgaByXEYD3j=-+H_PKAxXRU78svPMRHDKKci8AGXAU…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
Changes in v2:
* Used the stub instead of returning NULL inside the function
drivers/pci/probe.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4b8693ec9e4c..e6a34db77826 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2508,6 +2508,7 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
}
EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
+#if IS_ENABLED(CONFIG_PCI_PWRCTRL)
static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
{
struct pci_host_bridge *host = pci_find_host_bridge(bus);
@@ -2537,6 +2538,12 @@ static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, in
return pdev;
}
+#else
+static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
+{
+ return NULL;
+}
+#endif
/*
* Read the config data for a PCI device, sanity-check it,
--
2.43.0
Make sure to drop the reference to the pwrctrl device taken by
of_find_device_by_node() when registering a PCI device.
Fixes: b458ff7e8176 ("PCI/pwrctl: Ensure that pwrctl drivers are probed before PCI client drivers")
Cc: stable(a)vger.kernel.org # 6.13
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/pci/bus.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 69048869ef1c..0394a9c77b38 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -362,11 +362,15 @@ void pci_bus_add_device(struct pci_dev *dev)
* before PCI client drivers.
*/
pdev = of_find_device_by_node(dn);
- if (pdev && of_pci_supply_present(dn)) {
- if (!device_link_add(&dev->dev, &pdev->dev,
- DL_FLAG_AUTOREMOVE_CONSUMER))
- pci_err(dev, "failed to add device link to power control device %s\n",
- pdev->name);
+ if (pdev) {
+ if (of_pci_supply_present(dn)) {
+ if (!device_link_add(&dev->dev, &pdev->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER)) {
+ pci_err(dev, "failed to add device link to power control device %s\n",
+ pdev->name);
+ }
+ }
+ put_device(&pdev->dev);
}
if (!dn || of_device_is_available(dn))
--
2.49.1
Using device_find_child() to locate a probed virtual-device-port node
causes a device refcount imbalance, as device_find_child() internally
calls get_device() to increment the device’s reference count before
returning its pointer. vdc_port_mpgroup_check() directly returns true
upon finding a matching device without releasing the reference via
put_device(). We should call put_device() to decrement refcount.
As comment of device_find_child() says, 'NOTE: you will need to drop
the reference with put_device() after use'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 3ee70591d6c4 ("sunvdc: prevent sunvdc panic when mpgroup disk added to guest domain")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- keep the change style simple as suggestions.
---
drivers/block/sunvdc.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index b5727dea15bd..7af21fe67671 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -957,8 +957,10 @@ static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
dev = device_find_child(vdev->dev.parent, &port_data,
vdc_device_probed);
- if (dev)
+ if (dev) {
+ put_device(dev);
return true;
+ }
return false;
}
--
2.25.1
syzbot reports a use-after-free in comedi in the below link, which is
due to comedi gladly removing the allocated async area even though poll
requests are still active on the wait_queue_head inside of it. This can
cause a use-after-free when the poll entries are later triggered or
removed, as the memory for the wait_queue_head has been freed. We need
to check there are no tasks queued on any of the subdevices' wait queues
before allowing the device to be detached by the `COMEDI_DEVCONFIG`
ioctl.
Tasks will read-lock `dev->attach_lock` before adding themselves to the
subdevice wait queue, so fix the problem in the `COMEDI_DEVCONFIG` ioctl
handler by write-locking `dev->attach_lock` before checking that all of
the subdevices are safe to be deleted. This includes testing for any
sleepers on the subdevices' wait queues. It remains locked until the
device has been detached. This requires the `comedi_device_detach()`
function to be refactored slightly, moving the bulk of it into new
function `comedi_device_detach_locked()`.
Note that the refactor of `comedi_device_detach()` results in
`comedi_device_cancel_all()` now being called while `dev->attach_lock`
is write-locked, which wasn't the case previously, but that does not
matter.
Thanks to Jens Axboe for diagnosing the problem and co-developing this
patch.
Cc: <stable(a)vger.kernel.org> # 5.13+
Fixes: 2f3fdcd7ce93 ("staging: comedi: add rw_semaphore to protect against device detachment")
Link: https://lore.kernel.org/all/687bd5fe.a70a0220.693ce.0091.GAE@google.com/
Reported-by: syzbot+01523a0ae5600aef5895(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=01523a0ae5600aef5895
Co-developed-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
Patch does not apply cleanly to longterm release series 5.10 and 5.4.
---
drivers/comedi/comedi_fops.c | 31 ++++++++++++++++++++++++-------
drivers/comedi/comedi_internal.h | 1 +
drivers/comedi/drivers.c | 13 ++++++++++---
3 files changed, 35 insertions(+), 10 deletions(-)
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index 3383a7ce27ff..3007fe946e42 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -787,6 +787,7 @@ static int is_device_busy(struct comedi_device *dev)
struct comedi_subdevice *s;
int i;
+ lockdep_assert_held_write(&dev->attach_lock);
lockdep_assert_held(&dev->mutex);
if (!dev->attached)
return 0;
@@ -795,7 +796,16 @@ static int is_device_busy(struct comedi_device *dev)
s = &dev->subdevices[i];
if (s->busy)
return 1;
- if (s->async && comedi_buf_is_mmapped(s))
+ if (!s->async)
+ continue;
+ if (comedi_buf_is_mmapped(s))
+ return 1;
+ /*
+ * There may be tasks still waiting on the subdevice's wait
+ * queue, although they should already be about to be removed
+ * from it since the subdevice has no active async command.
+ */
+ if (wq_has_sleeper(&s->async->wait_head))
return 1;
}
@@ -825,15 +835,22 @@ static int do_devconfig_ioctl(struct comedi_device *dev,
return -EPERM;
if (!arg) {
- if (is_device_busy(dev))
- return -EBUSY;
+ int rc = 0;
+
if (dev->attached) {
- struct module *driver_module = dev->driver->module;
+ down_write(&dev->attach_lock);
+ if (is_device_busy(dev)) {
+ rc = -EBUSY;
+ } else {
+ struct module *driver_module =
+ dev->driver->module;
- comedi_device_detach(dev);
- module_put(driver_module);
+ comedi_device_detach_locked(dev);
+ module_put(driver_module);
+ }
+ up_write(&dev->attach_lock);
}
- return 0;
+ return rc;
}
if (copy_from_user(&it, arg, sizeof(it)))
diff --git a/drivers/comedi/comedi_internal.h b/drivers/comedi/comedi_internal.h
index 9b3631a654c8..cf10ba016ebc 100644
--- a/drivers/comedi/comedi_internal.h
+++ b/drivers/comedi/comedi_internal.h
@@ -50,6 +50,7 @@ extern struct mutex comedi_drivers_list_lock;
int insn_inval(struct comedi_device *dev, struct comedi_subdevice *s,
struct comedi_insn *insn, unsigned int *data);
+void comedi_device_detach_locked(struct comedi_device *dev);
void comedi_device_detach(struct comedi_device *dev);
int comedi_device_attach(struct comedi_device *dev,
struct comedi_devconfig *it);
diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c
index 376130bfba8a..f5c2ee271d0e 100644
--- a/drivers/comedi/drivers.c
+++ b/drivers/comedi/drivers.c
@@ -158,7 +158,7 @@ static void comedi_device_detach_cleanup(struct comedi_device *dev)
int i;
struct comedi_subdevice *s;
- lockdep_assert_held(&dev->attach_lock);
+ lockdep_assert_held_write(&dev->attach_lock);
lockdep_assert_held(&dev->mutex);
if (dev->subdevices) {
for (i = 0; i < dev->n_subdevices; i++) {
@@ -196,16 +196,23 @@ static void comedi_device_detach_cleanup(struct comedi_device *dev)
comedi_clear_hw_dev(dev);
}
-void comedi_device_detach(struct comedi_device *dev)
+void comedi_device_detach_locked(struct comedi_device *dev)
{
+ lockdep_assert_held_write(&dev->attach_lock);
lockdep_assert_held(&dev->mutex);
comedi_device_cancel_all(dev);
- down_write(&dev->attach_lock);
dev->attached = false;
dev->detach_count++;
if (dev->driver)
dev->driver->detach(dev);
comedi_device_detach_cleanup(dev);
+}
+
+void comedi_device_detach(struct comedi_device *dev)
+{
+ lockdep_assert_held(&dev->mutex);
+ down_write(&dev->attach_lock);
+ comedi_device_detach_locked(dev);
up_write(&dev->attach_lock);
}
--
2.47.2
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b5e8acc14dcb314a9b61ff19dcd9fdd0d88f70df
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072113-perjury-dynamite-23ba@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b5e8acc14dcb314a9b61ff19dcd9fdd0d88f70df Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Fri, 18 Jul 2025 22:31:58 -0400
Subject: [PATCH] tracing: Add down_write(trace_event_sem) when adding trace
event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a module is loaded, it adds trace events defined by the module. It
may also need to modify the modules trace printk formats to replace enum
names with their values.
If two modules are loaded at the same time, the adding of the event to the
ftrace_events list can corrupt the walking of the list in the code that is
modifying the printk format strings and crash the kernel.
The addition of the event should take the trace_event_sem for write while
it adds the new event.
Also add a lockdep_assert_held() on that semaphore in
__trace_add_event_dirs() as it iterates the list.
Cc: stable(a)vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Link: https://lore.kernel.org/20250718223158.799bfc0c@batman.local.home
Reported-by: Fusheng Huang(黄富生) <Fusheng.Huang(a)luxshare-ict.com>
Closes: https://lore.kernel.org/all/20250717105007.46ccd18f@batman.local.home/
Fixes: 110bf2b764eb6 ("tracing: add protection around module events unload")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 120531268abf..d01e5c910ce1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3136,7 +3136,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
if (ret < 0)
return ret;
+ down_write(&trace_event_sem);
list_add(&call->list, &ftrace_events);
+ up_write(&trace_event_sem);
+
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
@@ -3750,6 +3753,8 @@ __trace_add_event_dirs(struct trace_array *tr)
struct trace_event_call *call;
int ret;
+ lockdep_assert_held(&trace_event_sem);
+
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b5e8acc14dcb314a9b61ff19dcd9fdd0d88f70df
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072113-nutty-other-f178@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b5e8acc14dcb314a9b61ff19dcd9fdd0d88f70df Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Fri, 18 Jul 2025 22:31:58 -0400
Subject: [PATCH] tracing: Add down_write(trace_event_sem) when adding trace
event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a module is loaded, it adds trace events defined by the module. It
may also need to modify the modules trace printk formats to replace enum
names with their values.
If two modules are loaded at the same time, the adding of the event to the
ftrace_events list can corrupt the walking of the list in the code that is
modifying the printk format strings and crash the kernel.
The addition of the event should take the trace_event_sem for write while
it adds the new event.
Also add a lockdep_assert_held() on that semaphore in
__trace_add_event_dirs() as it iterates the list.
Cc: stable(a)vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Link: https://lore.kernel.org/20250718223158.799bfc0c@batman.local.home
Reported-by: Fusheng Huang(黄富生) <Fusheng.Huang(a)luxshare-ict.com>
Closes: https://lore.kernel.org/all/20250717105007.46ccd18f@batman.local.home/
Fixes: 110bf2b764eb6 ("tracing: add protection around module events unload")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 120531268abf..d01e5c910ce1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3136,7 +3136,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
if (ret < 0)
return ret;
+ down_write(&trace_event_sem);
list_add(&call->list, &ftrace_events);
+ up_write(&trace_event_sem);
+
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
@@ -3750,6 +3753,8 @@ __trace_add_event_dirs(struct trace_array *tr)
struct trace_event_call *call;
int ret;
+ lockdep_assert_held(&trace_event_sem);
+
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 500ba33284416255b9a5b50ace24470b6fe77ea5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072114-matriarch-decline-2598@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 500ba33284416255b9a5b50ace24470b6fe77ea5 Mon Sep 17 00:00:00 2001
From: Maulik Shah <maulik.shah(a)oss.qualcomm.com>
Date: Wed, 9 Jul 2025 14:00:11 +0530
Subject: [PATCH] pmdomain: governor: Consider CPU latency tolerance from
pm_domain_cpu_gov
pm_domain_cpu_gov is selecting a cluster idle state but does not consider
latency tolerance of child CPUs. This results in deeper cluster idle state
whose latency does not meet latency tolerance requirement.
Select deeper idle state only if global and device latency tolerance of all
child CPUs meet.
Test results on SM8750 with 300 usec PM-QoS on CPU0 which is less than
domain idle state entry (2150) + exit (1983) usec latency mentioned in
devicetree, demonstrate the issue.
# echo 300 > /sys/devices/system/cpu/cpu0/power/pm_qos_resume_latency_us
Before: (Usage is incrementing)
======
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 29817 537 8 270 0
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 30348 542 8 271 0
After: (Usage is not incrementing due to latency tolerance)
======
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 39319 626 14 307 0
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 39319 626 14 307 0
Signed-off-by: Maulik Shah <maulik.shah(a)oss.qualcomm.com>
Fixes: e94999688e3a ("PM / Domains: Add genpd governor for CPUs")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250709-pmdomain_qos-v2-1-976b12257899@oss.qualc…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index c1e148657c87..39359811a930 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -8,6 +8,7 @@
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/hrtimer.h>
+#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/ktime.h>
@@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
struct cpuidle_device *dev;
ktime_t domain_wakeup, next_hrtimer;
ktime_t now = ktime_get();
+ struct device *cpu_dev;
+ s64 cpu_constraint, global_constraint;
s64 idle_duration_ns;
int cpu, i;
@@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
return true;
+ global_constraint = cpu_latency_qos_limit();
/*
* Find the next wakeup for any of the online CPUs within the PM domain
* and its subdomains. Note, we only need the genpd->cpus, as it already
@@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (ktime_before(next_hrtimer, domain_wakeup))
domain_wakeup = next_hrtimer;
}
+
+ cpu_dev = get_cpu_device(cpu);
+ if (cpu_dev) {
+ cpu_constraint = dev_pm_qos_raw_resume_latency(cpu_dev);
+ if (cpu_constraint < global_constraint)
+ global_constraint = cpu_constraint;
+ }
}
+ global_constraint *= NSEC_PER_USEC;
/* The minimum idle duration is from now - until the next wakeup. */
idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
if (idle_duration_ns <= 0)
@@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
*/
i = genpd->state_idx;
do {
- if (idle_duration_ns >= (genpd->states[i].residency_ns +
- genpd->states[i].power_off_latency_ns)) {
+ if ((idle_duration_ns >= (genpd->states[i].residency_ns +
+ genpd->states[i].power_off_latency_ns)) &&
+ (global_constraint >= (genpd->states[i].power_on_latency_ns +
+ genpd->states[i].power_off_latency_ns))) {
genpd->state_idx = i;
genpd->gd->last_enter = now;
genpd->gd->reflect_residency = true;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 500ba33284416255b9a5b50ace24470b6fe77ea5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072113-grill-thimble-5d5c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 500ba33284416255b9a5b50ace24470b6fe77ea5 Mon Sep 17 00:00:00 2001
From: Maulik Shah <maulik.shah(a)oss.qualcomm.com>
Date: Wed, 9 Jul 2025 14:00:11 +0530
Subject: [PATCH] pmdomain: governor: Consider CPU latency tolerance from
pm_domain_cpu_gov
pm_domain_cpu_gov is selecting a cluster idle state but does not consider
latency tolerance of child CPUs. This results in deeper cluster idle state
whose latency does not meet latency tolerance requirement.
Select deeper idle state only if global and device latency tolerance of all
child CPUs meet.
Test results on SM8750 with 300 usec PM-QoS on CPU0 which is less than
domain idle state entry (2150) + exit (1983) usec latency mentioned in
devicetree, demonstrate the issue.
# echo 300 > /sys/devices/system/cpu/cpu0/power/pm_qos_resume_latency_us
Before: (Usage is incrementing)
======
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 29817 537 8 270 0
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 30348 542 8 271 0
After: (Usage is not incrementing due to latency tolerance)
======
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 39319 626 14 307 0
# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
State Time Spent(ms) Usage Rejected Above Below
S0 39319 626 14 307 0
Signed-off-by: Maulik Shah <maulik.shah(a)oss.qualcomm.com>
Fixes: e94999688e3a ("PM / Domains: Add genpd governor for CPUs")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250709-pmdomain_qos-v2-1-976b12257899@oss.qualc…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index c1e148657c87..39359811a930 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -8,6 +8,7 @@
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/hrtimer.h>
+#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/ktime.h>
@@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
struct cpuidle_device *dev;
ktime_t domain_wakeup, next_hrtimer;
ktime_t now = ktime_get();
+ struct device *cpu_dev;
+ s64 cpu_constraint, global_constraint;
s64 idle_duration_ns;
int cpu, i;
@@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
return true;
+ global_constraint = cpu_latency_qos_limit();
/*
* Find the next wakeup for any of the online CPUs within the PM domain
* and its subdomains. Note, we only need the genpd->cpus, as it already
@@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (ktime_before(next_hrtimer, domain_wakeup))
domain_wakeup = next_hrtimer;
}
+
+ cpu_dev = get_cpu_device(cpu);
+ if (cpu_dev) {
+ cpu_constraint = dev_pm_qos_raw_resume_latency(cpu_dev);
+ if (cpu_constraint < global_constraint)
+ global_constraint = cpu_constraint;
+ }
}
+ global_constraint *= NSEC_PER_USEC;
/* The minimum idle duration is from now - until the next wakeup. */
idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
if (idle_duration_ns <= 0)
@@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
*/
i = genpd->state_idx;
do {
- if (idle_duration_ns >= (genpd->states[i].residency_ns +
- genpd->states[i].power_off_latency_ns)) {
+ if ((idle_duration_ns >= (genpd->states[i].residency_ns +
+ genpd->states[i].power_off_latency_ns)) &&
+ (global_constraint >= (genpd->states[i].power_on_latency_ns +
+ genpd->states[i].power_off_latency_ns))) {
genpd->state_idx = i;
genpd->gd->last_enter = now;
genpd->gd->reflect_residency = true;