Hi,
On 6 June 2013 12:37, Lukasz Majewski <l.majewski(a)samsung.com> wrote:
> This commit adds support for software based frequency boosting.
> Exynos4 SoCs (e.g. 4x12) allow setting of frequency above its normal
> condition limits. This can be done for some short time.
>
> Overclocking (boost) support came from cpufreq driver (which is platform
> dependent). Hence the data structure describing it is defined at its file.
>
> To allow support for either SW and HW (Intel) based boosting, the cpufreq
> core code has been extended to support both solutions.
>
> The main boost switch has been put at /sys/devices/system/cpu/cpufreq/boost.
Log requires some better paragraphs but I am not concerned about it for now.
> Signed-off-by: Lukasz Majewski <l.majewski(a)samsung.com>
> Signed-off-by: Myungjoo Ham <myungjoo.ham(a)samsung.com>
> ---
> drivers/cpufreq/cpufreq.c | 156 ++++++++++++++++++++++++++++++++++++++++++
> drivers/cpufreq/freq_table.c | 87 ++++++++++++++++++++++-
> include/linux/cpufreq.h | 35 +++++++++-
> 3 files changed, 275 insertions(+), 3 deletions(-)
My initial view on this patch is: "It is overly engineered and needs
to get simplified"
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index ca74e27..8cf9a92 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -133,6 +133,11 @@ bool have_governor_per_policy(void)
> return cpufreq_driver->have_governor_per_policy;
> }
>
> +/**
> + * Global definition of cpufreq_boost structure
> + */
> +struct cpufreq_boost *cpufreq_boost;
Probably just a 'static bool' here cpufreq_boost_enabled. Which takes
care of selection from sysfs.
> static struct cpufreq_governor *__find_governor(const char *str_governor)
> {
> @@ -761,6 +805,18 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
> if (cpufreq_set_drv_attr_files(policy, cpufreq_driver->attr))
> goto err_out_kobj_put;
>
> + if (cpufreq_driver->boost) {
> + if (sysfs_create_file(cpufreq_global_kobject,
> + &(global_boost.attr)))
This will report error for systems where we have two policy structures.
As we are creating something already present.
> + pr_warn("could not register global boost sysfs file\n");
> + else
> + pr_debug("registered global boost sysfs file\n");
Please make all your prints to print function name too:
pr_debug("%s: foo\n", __func__, foo);
> + if (cpufreq_set_drv_attr_files(policy,
> + cpufreq_driver->boost->attr))
Why is this required? Why do we want platforms to add some files
in sysfs?
> /*********************************************************************
> + * BOOST *
> + *********************************************************************/
> +int cpufreq_boost_trigger_state(struct cpufreq_policy *policy, int state)
> +{
> + struct cpufreq_boost *boost;
> + unsigned long flags;
> + int ret = 0;
> +
> + if (!policy || !policy->boost || !policy->boost->low_level_boost)
> + return -ENODEV;
> +
> + boost = policy->boost;
> + write_lock_irqsave(&cpufreq_driver_lock, flags);
> +
> + if (boost->status != state) {
> + policy->boost->status = state;
> + ret = boost->low_level_boost(policy, state);
> + if (ret) {
> + pr_err("BOOST cannot %sable low level code (%d)\n",
> + state ? "en" : "dis", ret);
> + return ret;
> + }
> + }
> +
> + write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> + pr_debug("cpufreq BOOST %sabled\n", state ? "en" : "dis");
> +
> + return 0;
> +}
> +
> +/**
> + * cpufreq_boost_notifier - notifier callback for cpufreq policy change.
> + * <at> nb: struct notifier_block * with callback info.
> + * <at> event: value showing cpufreq event for which this function invoked.
> + * <at> data: callback-specific data
> + */
> +static int cpufreq_boost_notifier(struct notifier_block *nb,
> + unsigned long event, void *data)
> +{
> + struct cpufreq_policy *policy = data;
> +
> + if (event == CPUFREQ_INCOMPATIBLE) {
> + if (!policy || !policy->boost)
> + return -ENODEV;
> +
> + if (policy->boost->status == CPUFREQ_BOOST_EN) {
> + pr_info("NOTIFIER BOOST: MAX: %d e:%lu cpu: %d\n",
> + policy->max, event, policy->cpu);
> + cpufreq_boost_trigger_state(policy, 0);
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* Notifier for cpufreq policy change */
> +static struct notifier_block cpufreq_boost_notifier_block = {
> + .notifier_call = cpufreq_boost_notifier,
> +};
> +
> +int cpufreq_boost_init(struct cpufreq_policy *policy)
> +{
> + int ret = 0;
> +
> + if (!policy)
> + return -EINVAL;
Heh, policy can't be NULL here.
> + if (!cpufreq_driver->boost) {
> + pr_err("Boost mode not supported on this device\n");
Wow!! You want to screw everybody else's logs with this message.
Its not a crime if you don't have boost mode supported :)
Actually this routine must be called only if cpufreq_driver->boost
is valid.
> + return -ENODEV;
> + }
> +
> + policy->boost = cpufreq_boost = cpufreq_driver->boost;
Why are we copying same pointer to policy->boost? Driver is
passing pointer to a single memory location, just save it globally.
> + /* disable boost for newly created policy - as we e.g. change
> + governor */
> + policy->boost->status = CPUFREQ_BOOST_DIS;
Drivers supporting boost may want boost to be enabled by default,
maybe without any sysfs calls.
> + /* register policy notifier */
> + ret = cpufreq_register_notifier(&cpufreq_boost_notifier_block,
> + CPUFREQ_POLICY_NOTIFIER);
> + if (ret) {
> + pr_err("CPUFREQ BOOST notifier not registered.\n");
> + return ret;
> + }
> + /* add policy to policies list headed at struct cpufreq_boost */
> + list_add_tail(&policy->boost_list, &cpufreq_boost->policies);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(cpufreq_boost_init);
Why do we need to maintain a list of boost here? notifiers? complex :(
> +/*********************************************************************
> * REGISTER / UNREGISTER CPUFREQ DRIVER *
> *********************************************************************/
>
> @@ -1954,6 +2106,10 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
> pr_debug("unregistering driver %s\n", driver->name);
>
> subsys_interface_unregister(&cpufreq_interface);
> +
> + if (cpufreq_driver->boost)
> + sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr));
You haven't removed this from policy. Memory leak.
> unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
>
> write_lock_irqsave(&cpufreq_driver_lock, flags);
> diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
> index d7a7966..0e95524 100644
> --- a/drivers/cpufreq/freq_table.c
> +++ b/drivers/cpufreq/freq_table.c
> @@ -3,6 +3,8 @@
> *
> * Copyright (C) 2002 - 2003 Dominik Brodowski
> *
> + * Copyright (C) 2013 Lukasz Majewski <l.majewski(a)samsung.com>
> + *
You shouldn't add it unless you did some major work on this file. You aren't
maintaining this file in 2013.
> +static int cpufreq_frequency_table_skip_boost(struct cpufreq_policy *policy,
> + unsigned int index)
> +{
> + if (index == CPUFREQ_BOOST)
> + if (!policy->boost ||
This shouldn't be true. If index has got CPUFREQ_BOOST, then driver
has to support boost.
> + policy->boost->status == CPUFREQ_BOOST_DIS)
> + return 1;
> +
> + return 0;
> +}
> +
> +unsigned int
> +cpufreq_frequency_table_boost_max(struct cpufreq_frequency_table *freq_table)
> +{
> + int index, boost_freq_max;
> +
> + for (index = 0, boost_freq_max = 0;
> + freq_table[index].frequency != CPUFREQ_TABLE_END; index++)
> + if (freq_table[index].index == CPUFREQ_BOOST) {
> + if (freq_table[index].frequency > boost_freq_max)
> + boost_freq_max = freq_table[index].frequency;
> + }
> +
> + return boost_freq_max;
> +}
> +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_boost_max);
why do we need this?
> /*
> * if you use these, you must assure that the frequency table is valid
> * all the time between get_attr and put_attr!
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 037d36a..1294c8c 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -88,6 +88,25 @@ struct cpufreq_real_policy {
> struct cpufreq_governor *governor; /* see below */
> };
>
> +#define CPUFREQ_BOOST_DIS (0)
> +#define CPUFREQ_BOOST_EN (1)
You don't need these.. Just create variable as bool and 0 & 1 would
be fine.
> +struct cpufreq_policy;
> +struct cpufreq_boost {
> + unsigned int max_boost_freq; /* maximum value of
> + * boosted freq */
> + unsigned int max_normal_freq; /* non boost max freq */
> + int status; /* status of boost */
> +
> + /* boost sysfs attributies */
> + struct freq_attr **attr;
> +
> + /* low-level trigger for boost */
> + int (*low_level_boost) (struct cpufreq_policy *policy, int state);
> +
> + struct list_head policies;
> +};
We don't need it. Just add two more fields to cpufreq_driver:
- have_boost_freqs and low_level_boost (maybe a better name.
What's its use?)
> struct cpufreq_policy {
> /* CPUs sharing clock, require sw coordination */
> cpumask_var_t cpus; /* Online CPUs only */
> @@ -113,6 +132,9 @@ struct cpufreq_policy {
>
> struct cpufreq_real_policy user_policy;
>
> + struct cpufreq_boost *boost;
> + struct list_head boost_list;
We don't need both of these.
> struct kobject kobj;
> struct completion kobj_unregister;
> };
> @@ -277,7 +302,6 @@ struct cpufreq_driver {
> int cpufreq_register_driver(struct cpufreq_driver *driver_data);
> int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
>
> -
??
> void cpufreq_notify_transition(struct cpufreq_policy *policy,
> struct cpufreq_freqs *freqs, unsigned int state);
>
> @@ -403,6 +427,9 @@ extern struct cpufreq_governor cpufreq_gov_conservative;
> #define CPUFREQ_ENTRY_INVALID ~0
> #define CPUFREQ_TABLE_END ~1
>
> +/* Define index for boost frequency */
> +#define CPUFREQ_BOOST ~2
s/CPUFREQ_BOOST/CPUFREQ_BOOST_FREQ
== Linus Walleij linusw ==
=== Highlights ===
* Wrote a patch series bringing runtime PM of pins
into the device core. Greg and Mark ACKed it
immediately so I have it queued in the pinctrl tree
now. Kevin and I continue to discuss runtime PM
implications and whether we can centralize it even
more. This basically have activated this long-dormant
blueprint:
https://blueprints.launchpad.net/linux-linaro/+spec/runtimepm-pinctrl
(WG leads: please approve this blueprint.)
* Olof J pulled my ux500-fixes branch to ARM SoC.
* Applied and queued various ux500 patches, collected
ACKs and sent five (5) pull requests for the following
branches on ux500:
ux500-core
ux500-clocksource
ux500-pinctrl
ux500-dma40
ux500-devicetree
I also have a ux500-defconfig branch, that will be
submitted later, turning on this and some more new
stuff that will hit the v3.11 merge window.
* Sent a pull request for the Integrator PCI DT patch
series to ARM SoC.
* Reviewed lots of pinctrl code. Qeueued some
pinctrl patches at least. A lot of the discussion revolves
around generic DT bindings for generic pin config.
* Put my nose into some submarine-type review of new
platforms like the Rockchip and HiSilicon.
=== Plans ===
* Finalize U300 DT+multiplatform patch set and send
a pull request for it. Utilizing the syscon MFD driver
this means I will actually look at regmap now!
* Start to delete Integrator board files and convert to
multiplatform once the PCI DT patches land in ARM
SoC.
* Convert Nomadik pinctrl driver to register GPIO ranges
from the gpiochip side.
* Test the PL08x patches on the Ericsson Research
PB11MPCore and submit platform data for using
pl08x DMA on that platform.
=== Issues ===
* Subsystem maintainers in the kernel community are
acting like Judge Dredd on DT review and commit issues,
as noted last week.
* Some impediments from internal turmoil @ST-Ericsson.
Thanks,
Linus Walleij
On 6 June 2013 15:25, Borislav Petkov <bp(a)suse.de> wrote:
> The correct "fix" for this whole deal is coupling cpufreq with
> the scheduler, as it has been said so many times before. You need
> "something" which can tell you whether raising the freq. is worth it or
> not (i.e. the process is waiting on IO or is executing instructions).
Linaro has got a blueprint in this direction but doesn't have any
proof of concept or RFC patches to share. But that will happen soon.
On 6 June 2013 12:37, Lukasz Majewski <l.majewski(a)samsung.com> wrote:
> Subject: cpufreq: Define cpufreq_set_drv_attr_files() to add per CPU sysfs attributes
Its not per-cpu. We just add it for policy->cpu and other routines
actually create links.
> The cpufreq_set_drv_attr_files() function creates sysfs file entry for
> each available CPU. With it in place it is possible to add different
> set of attributes without code duplication.
Not for each available cpu but are linked to a policy->kobj and so
shows up on each policy->cpus.
> Signed-off-by: Lukasz Majewski <l.majewski(a)samsung.com>
> Signed-off-by: Myungjoo Ham <myungjoo.ham(a)samsung.com>
> ---
> drivers/cpufreq/cpufreq.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 1b8a48e..ca74e27 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -730,12 +730,23 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
> return ret;
> }
>
> +static int cpufreq_set_drv_attr_files(struct cpufreq_policy *policy,
> + struct freq_attr **drv_attr)
> +{
> + while ((drv_attr) && (*drv_attr)) {
> + if (sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)))
> + return 1;
You are changing the semantics here. We used to return error
value from sysfs_create_file() and you are returning 1.
> + drv_attr++;
If drv_attr was valid initially, then drv_attr++ can't make it NULL.
So, we don't need to check validity of drv_attr for every loop.
Currently, the RTC IRQ is never wakeup-enabled so is not capable of
bringing the system out of suspend.
On OMAP platforms, we have gotten by without this because the TWL RTC
is on an I2C-connected chip which is capable of waking up the OMAP via
the IO ring when the OMAP is in low-power states.
However, if the OMAP suspends without hitting the low-power states
(and the IO ring is not enabled), RTC wakeups will not work because
the IRQ is not wakeup enabled.
To fix, ensure the RTC IRQ is wakeup enabled whenever the RTC alarm is
set.
Cc: Alessandro Zummo <a.zummo(a)towertech.it>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Tony Lindgren <tony(a)atomide.com>
Signed-off-by: Kevin Hilman <khilman(a)linaro.org>
---
drivers/rtc/rtc-twl.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 8751a52..bbda0fd 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -213,12 +213,24 @@ static int mask_rtc_irq_bit(unsigned char bit)
static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
{
+ struct platform_device *pdev = to_platform_device(dev);
+ int irq = platform_get_irq(pdev, 0);
+ static bool twl_rtc_wake_enabled;
int ret;
- if (enabled)
+ if (enabled) {
ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
- else
+ if (device_can_wakeup(dev) && !twl_rtc_wake_enabled) {
+ enable_irq_wake(irq);
+ twl_rtc_wake_enabled = true;
+ }
+ } else {
ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+ if (twl_rtc_wake_enabled) {
+ disable_irq_wake(irq);
+ twl_rtc_wake_enabled = false;
+ }
+ }
return ret;
}
--
1.8.2
I have faced a sequence where the Idle Load Balance was sometime not
triggered for a while on my platform.
CPU 0 and CPU 1 are running tasks and CPU 2 is idle
CPU 1 kicks the Idle Load Balance
CPU 1 selects CPU 2 as the new Idle Load Balancer
CPU 1 sets NOHZ_BALANCE_KICK for CPU 2
CPU 1 sends a reschedule IPI to CPU 2
While CPU 2 wakes up, CPU 0 or CPU 1 migrates a waking task A on CPU 2
CPU 2 finally wakes up, runs task A and discards the Idle Load Balance
Task A quickly goes back to sleep (before a tick occurs on CPU 2)
CPU 2 goes back to idle with NOHZ_BALANCE_KICK set
Whenever CPU 2 will be selected for the ILB, reschedule IPI will be not
sent to CPU2, which is idle, because NOHZ_BALANCE_KICK is already set
and no Idle Load Balance will be performed.
We must wait for the sched softirq to be raised on CPU 2 thanks to
another part of the kernel to clear NOHZ_BALANCE_KICKand come back to
a normal situation.
The proposed solution clears NOHZ_BALANCE_KICK in schedule_ipi if
we can't raise the sched_softirq for the Idle Load Balance.
Signed-off-by: Vincent Guittot <vincent.guittot(a)linaro.org>
---
kernel/sched/core.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 58453b8..51fc715 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1420,7 +1420,8 @@ void scheduler_ipi(void)
if (unlikely(got_nohz_idle_kick() && !need_resched())) {
this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ);
- }
+ } else
+ clear_bit(NOHZ_BALANCE_KICK, nohz_flags(smp_processor_id()));
irq_exit();
}
--
1.7.9.5
We are saving first scheduling domain for a cpu in build_sched_domains() by
iterating over the nested sd->child list. We don't actually need to do it this
way.
*per_cpu_ptr(d.sd, i) is guaranteed to be NULL in the beginning as we have
called __visit_domain_allocation_hell() which does a memset to zero for struct
s_data.
So, save pointer to first SD while running the iteration loop over tl's.
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
kernel/sched/core.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 58453b8..638f6cb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6533,16 +6533,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
sd = NULL;
for (tl = sched_domain_topology; tl->init; tl++) {
sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
+ if (!*per_cpu_ptr(d.sd, i))
+ *per_cpu_ptr(d.sd, i) = sd;
if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
sd->flags |= SD_OVERLAP;
if (cpumask_equal(cpu_map, sched_domain_span(sd)))
break;
}
-
- while (sd->child)
- sd = sd->child;
-
- *per_cpu_ptr(d.sd, i) = sd;
}
/* Build the groups for the domains */
--
1.7.12.rc2.18.g61b472e
Commit bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92 brought the multiple driver
support. The code added a couple of new API to register the driver per cpu.
That led to some code complexity to handle the kernel config options when
the multiple driver support is enabled or not, which is not really necessary.
The code has to be compatible when the multiple driver support is not enabled,
and the multiple driver support has to be compatible with the old api.
This patch removes this API, which is not yet used by any driver but needed
for the HMP cpuidle drivers which will come soon, and replaces its usage
by a cpumask pointer in the cpuidle driver structure telling what cpus are
handled by the driver. That let the API cpuidle_[un]register_driver to be used
for the multipled driver support and also the cpuidle_[un]register functions,
added recently in the cpuidle framework.
The current code, a bit poor in comments, has been commented and simplified.
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
---
[V2]:
- fixed bad refcount check
- inverted clockevent notify off order at unregister time
drivers/cpuidle/cpuidle.c | 4 +-
drivers/cpuidle/driver.c | 324 ++++++++++++++++++++++++++++-----------------
include/linux/cpuidle.h | 21 +--
3 files changed, 213 insertions(+), 136 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c3a93fe..fdc432f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -466,7 +466,7 @@ void cpuidle_unregister(struct cpuidle_driver *drv)
int cpu;
struct cpuidle_device *device;
- for_each_possible_cpu(cpu) {
+ for_each_cpu(cpu, drv->cpumask) {
device = &per_cpu(cpuidle_dev, cpu);
cpuidle_unregister_device(device);
}
@@ -498,7 +498,7 @@ int cpuidle_register(struct cpuidle_driver *drv,
return ret;
}
- for_each_possible_cpu(cpu) {
+ for_each_cpu(cpu, drv->cpumask) {
device = &per_cpu(cpuidle_dev, cpu);
device->cpu = cpu;
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 8dfaaae..0268346 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -18,206 +18,266 @@
DEFINE_SPINLOCK(cpuidle_driver_lock);
-static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
-static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
-static void cpuidle_setup_broadcast_timer(void *arg)
+static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+
+/**
+ * __cpuidle_get_cpu_driver: returns the cpuidle driver tied with the specified
+ * cpu.
+ *
+ * @cpu: an integer specifying the cpu number
+ *
+ * Returns a pointer to struct cpuidle_driver, NULL if no driver has been
+ * registered for this driver
+ */
+static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
{
- int cpu = smp_processor_id();
- clockevents_notify((long)(arg), &cpu);
+ return per_cpu(cpuidle_drivers, cpu);
}
-static void __cpuidle_driver_init(struct cpuidle_driver *drv, int cpu)
+/**
+ * __cpuidle_set_driver: assign to the per cpu variable the driver pointer for
+ * each cpu the driver is assigned to with the cpumask.
+ *
+ * @drv: a pointer to a struct cpuidle_driver
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
{
- int i;
+ int cpu;
- drv->refcnt = 0;
+ for_each_cpu(cpu, drv->cpumask) {
- for (i = drv->state_count - 1; i >= 0 ; i--) {
+ if (__cpuidle_get_cpu_driver(cpu))
+ return -EBUSY;
- if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP))
- continue;
-
- drv->bctimer = 1;
- on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
- (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
- break;
+ per_cpu(cpuidle_drivers, cpu) = drv;
}
+
+ return 0;
}
-static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
+/**
+ * __cpuidle_unset_driver: for each cpu the driver is handling, set the per cpu
+ * variable driver to NULL.
+ *
+ * @drv: a pointer to a struct cpuidle_driver
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
{
- if (!drv || !drv->state_count)
- return -EINVAL;
-
- if (cpuidle_disabled())
- return -ENODEV;
-
- if (__cpuidle_get_cpu_driver(cpu))
- return -EBUSY;
+ int cpu;
- __cpuidle_driver_init(drv, cpu);
+ for_each_cpu(cpu, drv->cpumask) {
- __cpuidle_set_cpu_driver(drv, cpu);
+ if (drv != __cpuidle_get_cpu_driver(cpu))
+ continue;
- return 0;
+ per_cpu(cpuidle_drivers, cpu) = NULL;
+ }
}
-static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
-{
- if (drv != __cpuidle_get_cpu_driver(cpu))
- return;
+#else
- if (!WARN_ON(drv->refcnt > 0))
- __cpuidle_set_cpu_driver(NULL, cpu);
+static struct cpuidle_driver *cpuidle_curr_driver;
- if (drv->bctimer) {
- drv->bctimer = 0;
- on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
- (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
- }
+/**
+ * __cpuidle_get_cpu_driver: returns the global cpuidle driver pointer.
+ *
+ * @cpu: an integer specifying the cpu number, this parameter is ignored
+ *
+ * Returns a pointer to a struct cpuidle_driver, NULL if no driver was
+ * previously registered
+ */
+static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+ return cpuidle_curr_driver;
}
-#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+/**
+ * __cpuidle_set_driver: assign the cpuidle driver pointer to the global cpuidle
+ * driver variable.
+ *
+ * @drv: a pointer to a struct cpuidle_driver
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
+{
+ if (cpuidle_curr_driver)
+ return -EBUSY;
-static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+ cpuidle_curr_driver = drv;
-static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
-{
- per_cpu(cpuidle_drivers, cpu) = drv;
+ return 0;
}
-static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+/**
+ * __cpuidle_unset_driver: reset the global cpuidle driver variable if the
+ * cpuidle driver pointer match it.
+ *
+ * @drv: a pointer to a struct cpuidle_driver
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
{
- return per_cpu(cpuidle_drivers, cpu);
+ if (drv == cpuidle_curr_driver)
+ cpuidle_curr_driver = NULL;
}
-static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
+#endif
+
+/**
+ * cpuidle_setup_broadcast_timer: set the broadcast timer notification for the
+ * current cpu. This function is called per cpu context invoked by a smp cross
+ * call. It is not supposed to be called directly.
+ *
+ * @arg: a void pointer, actually used to match the smp cross call api but used
+ * as a long with two values:
+ * - CLOCK_EVT_NOTIFY_BROADCAST_ON
+ * - CLOCK_EVT_NOTIFY_BROADCAST_OFF
+ */
+static void cpuidle_setup_broadcast_timer(void *arg)
{
- int cpu;
- for_each_present_cpu(cpu)
- __cpuidle_unregister_driver(drv, cpu);
+ int cpu = smp_processor_id();
+ clockevents_notify((long)(arg), &cpu);
}
-static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
+/**
+ * __cpuidle_driver_init: initialize the driver internal data.
+ *
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static int __cpuidle_driver_init(struct cpuidle_driver *drv)
{
- int ret = 0;
- int i, cpu;
+ int i;
- for_each_present_cpu(cpu) {
- ret = __cpuidle_register_driver(drv, cpu);
- if (ret)
- break;
- }
+ drv->refcnt = 0;
- if (ret)
- for_each_present_cpu(i) {
- if (i == cpu)
- break;
- __cpuidle_unregister_driver(drv, i);
- }
+ /*
+ * we default here to all cpu possible because if the kernel
+ * boots with some cpus offline and then we online one of them
+ * the cpu notifier won't know which driver to assign
+ */
+ if (!drv->cpumask)
+ drv->cpumask = (struct cpumask *)cpu_possible_mask;
+
+ /*
+ * we look for the timer stop flag in the different states,
+ * so know we have to setup the broadcast timer. The loop is
+ * in reverse order, because usually the deeper state has this
+ * flag set
+ */
+ for (i = drv->state_count - 1; i >= 0 ; i--) {
+ if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP))
+ continue;
- return ret;
+ drv->bctimer = 1;
+ break;
+ }
+
+ return 0;
}
-int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
+/**
+ * __cpuidle_register_driver: do some sanity checks, initializes the driver,
+ * assign the driver to the global cpuidle driver variable(s) and setup the
+ * broadcast timer if the cpuidle driver has some states which shutdown the
+ * local timer.
+ *
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static int __cpuidle_register_driver(struct cpuidle_driver *drv)
{
int ret;
- spin_lock(&cpuidle_driver_lock);
- ret = __cpuidle_register_driver(drv, cpu);
- spin_unlock(&cpuidle_driver_lock);
+ if (!drv || !drv->state_count)
+ return -EINVAL;
- return ret;
-}
+ if (cpuidle_disabled())
+ return -ENODEV;
-void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
-{
- spin_lock(&cpuidle_driver_lock);
- __cpuidle_unregister_driver(drv, cpu);
- spin_unlock(&cpuidle_driver_lock);
-}
+ ret = __cpuidle_driver_init(drv);
+ if (ret)
+ return ret;
-/**
- * cpuidle_register_driver - registers a driver
- * @drv: the driver
- */
-int cpuidle_register_driver(struct cpuidle_driver *drv)
-{
- int ret;
+ ret = __cpuidle_set_driver(drv);
+ if (ret)
+ return ret;
- spin_lock(&cpuidle_driver_lock);
- ret = __cpuidle_register_all_cpu_driver(drv);
- spin_unlock(&cpuidle_driver_lock);
+ if (drv->bctimer)
+ on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+ (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL_GPL(cpuidle_register_driver);
/**
- * cpuidle_unregister_driver - unregisters a driver
- * @drv: the driver
+ * __cpuidle_unregister_driver: checks the driver is no longer in use, reset the
+ * global cpuidle driver variable(s) and disable the timer broadcast
+ * notification mechanism if it was in use.
+ *
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
*/
-void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
{
- spin_lock(&cpuidle_driver_lock);
- __cpuidle_unregister_all_cpu_driver(drv);
- spin_unlock(&cpuidle_driver_lock);
-}
-EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
-
-#else
-
-static struct cpuidle_driver *cpuidle_curr_driver;
+ if (WARN_ON(drv->refcnt > 0))
+ return;
-static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
-{
- cpuidle_curr_driver = drv;
-}
+ if (drv->bctimer) {
+ drv->bctimer = 0;
+ on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+ (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
+ }
-static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
-{
- return cpuidle_curr_driver;
+ __cpuidle_unset_driver(drv);
}
/**
- * cpuidle_register_driver - registers a driver
- * @drv: the driver
+ * cpuidle_register_driver: registers a driver by taking a lock to prevent
+ * multiple callers to [un]register a driver at the same time.
+ *
+ * @drv: a pointer to a valid struct cpuidle_driver
+ *
+ * Returns 0 on success, < 0 otherwise
*/
int cpuidle_register_driver(struct cpuidle_driver *drv)
{
- int ret, cpu;
+ int ret;
- cpu = get_cpu();
spin_lock(&cpuidle_driver_lock);
- ret = __cpuidle_register_driver(drv, cpu);
+ ret = __cpuidle_register_driver(drv);
spin_unlock(&cpuidle_driver_lock);
- put_cpu();
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register_driver);
/**
- * cpuidle_unregister_driver - unregisters a driver
- * @drv: the driver
+ * cpuidle_unregister_driver: unregisters a driver by taking a lock to prevent
+ * multiple callers to [un]register a driver at the same time. The specified
+ * driver must match the driver currently registered.
+ *
+ * @drv: a pointer to a valid struct cpuidle_driver
*/
void cpuidle_unregister_driver(struct cpuidle_driver *drv)
{
- int cpu;
-
- cpu = get_cpu();
spin_lock(&cpuidle_driver_lock);
- __cpuidle_unregister_driver(drv, cpu);
+ __cpuidle_unregister_driver(drv);
spin_unlock(&cpuidle_driver_lock);
- put_cpu();
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
-#endif
/**
- * cpuidle_get_driver - return the current driver
+ * cpuidle_get_driver: returns the driver tied with the current cpu.
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered
*/
struct cpuidle_driver *cpuidle_get_driver(void)
{
@@ -233,7 +293,12 @@ struct cpuidle_driver *cpuidle_get_driver(void)
EXPORT_SYMBOL_GPL(cpuidle_get_driver);
/**
- * cpuidle_get_cpu_driver - return the driver tied with a cpu
+ * cpuidle_get_cpu_driver: returns the driver registered with a cpu.
+ *
+ * @dev: a valid pointer to a struct cpuidle_device
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered
+ * for the specified cpu
*/
struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
{
@@ -244,6 +309,13 @@ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
}
EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
+/**
+ * cpuidle_driver_ref: gets a refcount for the driver. Note this function takes
+ * a refcount for the driver assigned to the current cpu.
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered
+ * for the current cpu
+ */
struct cpuidle_driver *cpuidle_driver_ref(void)
{
struct cpuidle_driver *drv;
@@ -257,6 +329,10 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
return drv;
}
+/**
+ * cpuidle_driver_unref: puts down the refcount for the driver. Note this
+ * function decrement the refcount for the driver assigned to the current cpu.
+ */
void cpuidle_driver_unref(void)
{
struct cpuidle_driver *drv = cpuidle_get_driver();
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 8f04062..63d78b1 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -101,16 +101,20 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
****************************/
struct cpuidle_driver {
- const char *name;
- struct module *owner;
- int refcnt;
+ const char *name;
+ struct module *owner;
+ int refcnt;
/* used by the cpuidle framework to setup the broadcast timer */
- unsigned int bctimer:1;
+ unsigned int bctimer:1;
+
/* states array must be ordered in decreasing power consumption */
- struct cpuidle_state states[CPUIDLE_STATE_MAX];
- int state_count;
- int safe_state_index;
+ struct cpuidle_state states[CPUIDLE_STATE_MAX];
+ int state_count;
+ int safe_state_index;
+
+ /* the driver handles the cpus in cpumask */
+ struct cpumask *cpumask;
};
#ifdef CONFIG_CPU_IDLE
@@ -135,9 +139,6 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
extern int cpuidle_play_dead(void);
extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
-extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu);
-extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu);
-
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_idle_call(void) { return -ENODEV; }
--
1.7.9.5