Each governor is suitable for different kernel configurations: the menu
governor suits better for a tickless system, while the ladder governor fits
better for a periodic timer tick system.
The Kconfig does not allow to [un]select a governor, thus both are compiled in
the kernel but the init order makes the menu governor to be the last one to be
registered, so becoming the default. The only way to switch back to the ladder
governor is to enable the sysfs governor switch in the kernel command line.
Because it seems nobody complained about this, the menu governor is used by
default most of the time on the system, having both governors is not really
necessary on a tickless system but there isn't a config option to disable one
or another governor.
Create a submenu for cpuidle and add a label for each governor, so we can see
the option in the menu config and enable/disable it.
The governors will be enabled depending on the CONFIG_NO_HZ option:
- If CONFIG_NO_HZ is set, then the menu governor is selected and the ladder
governor is optional, defaulting to 'no'
- If CONFIG_NO_HZ is not set, then the ladder governor is selected and the
menu governor is optional, defaulting to 'no'
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
---
drivers/cpuidle/Kconfig | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c4cc27e..90c2f39 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -1,7 +1,9 @@
-config CPU_IDLE
+menuconfig CPU_IDLE
bool "CPU idle PM support"
default y if ACPI || PPC_PSERIES
+ select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+ select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE)
help
CPU idle is a generic framework for supporting software-controlled
idle processor power management. It includes modular cross-platform
@@ -9,9 +11,10 @@ config CPU_IDLE
If you're using an ACPI-enabled platform, you should say Y here.
+if CPU_IDLE
+
config CPU_IDLE_MULTIPLE_DRIVERS
bool "Support multiple cpuidle drivers"
- depends on CPU_IDLE
default n
help
Allows the cpuidle framework to use different drivers for each CPU.
@@ -19,24 +22,19 @@ config CPU_IDLE_MULTIPLE_DRIVERS
states. If unsure say N.
config CPU_IDLE_GOV_LADDER
- bool
- depends on CPU_IDLE
- default y
+ bool "Ladder governor (for periodic timer tick)"
+ default n if (NO_HZ || NO_HZ_IDLE)
config CPU_IDLE_GOV_MENU
- bool
- depends on CPU_IDLE && NO_HZ
- default y
+ bool "Menu governor (for tickless system)"
+ default n if (!NO_HZ && !NO_HZ_IDLE)
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
-if CPU_IDLE
-
config CPU_IDLE_CALXEDA
bool "CPU Idle Driver for Calxeda processors"
depends on ARCH_HIGHBANK
help
Select this to enable cpuidle on Calxeda processors.
-
endif
--
1.7.9.5
This patch adds support for defining and passing earlyprintk
related information i.e. device and address information via
device tree by adding it inside "chosen" node.
This will help user to just specify "earlyprintk" from bootargs
without actually knowing the address and device to enable
earlyprintk.
Mechanism:
One can just append earlyprintk=device-type,address (same as we pass
through command line) in "/chosen" node to notify kernel which is the
earlyprintk device and what is its address.
Backward Compatibility:
This patch also allows existing method of specifying earlyprintk
parameter via bootargs.
Existing method i.e. passing via bootargs will still have precedence
over device tree i.e. if one specifies earlyprintk=device-type,address
in bootargs then kernel will use information from bootargs instead of
device tree.
If user just specifies earlyprintk (without =...) then kernel will
look for device tree earlyprintk parameter.
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar(a)linaro.org>
Signed-off-by: Anup Patel <anup.patel(a)linaro.org>
---
arch/arm64/kernel/early_printk.c | 7 +++++++
arch/arm64/kernel/setup.c | 22 +++++++++++++++++++++-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c
index fbb6e18..4e6f845 100644
--- a/arch/arm64/kernel/early_printk.c
+++ b/arch/arm64/kernel/early_printk.c
@@ -29,6 +29,8 @@
static void __iomem *early_base;
static void (*printch)(char ch);
+extern char *earlyprintk_dt_args;
+
/*
* PL011 single character TX.
*/
@@ -116,6 +118,11 @@ static int __init setup_early_printk(char *buf)
phys_addr_t paddr = 0;
if (!buf) {
+ /* Try to check if Device Tree has this argument or not ? */
+ buf = earlyprintk_dt_args;
+ }
+
+ if (!buf) {
pr_warning("No earlyprintk arguments passed.\n");
return 0;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 6a9a532..94ce7a9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -60,6 +60,9 @@ EXPORT_SYMBOL(processor_id);
unsigned int elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
+char *earlyprintk_dt_args;
+EXPORT_SYMBOL_GPL(earlyprintk_dt_args);
+
static const char *cpu_name;
static const char *machine_name;
phys_addr_t __fdt_pointer __initdata;
@@ -122,6 +125,23 @@ static void __init setup_processor(void)
elf_hwcap = 0;
}
+int __init early_init_dt_scan_chosen_arm64(unsigned long node,
+ const char *uname,
+ int depth, void *data)
+{
+ char *prop;
+
+ /* Check if this is chosen node */
+ if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "earlyprintk", NULL);
+ if (prop)
+ earlyprintk_dt_args = prop;
+
+ return 1;
+}
+
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
struct boot_param_header *devtree;
@@ -165,7 +185,7 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
pr_info("Machine: %s\n", machine_name);
/* Retrieve various information from the /chosen node */
- of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+ of_scan_flat_dt(early_init_dt_scan_chosen_arm64, boot_command_line);
/* Initialize {size,address}-cells info */
of_scan_flat_dt(early_init_dt_scan_root, NULL);
/* Setup memory, calling early_init_dt_add_memory_arch */
--
1.7.9.5
Each governor is suitable for different kernel configurations: the menu
governor suits better for a tickless system, while the ladder governor fits
better for a periodic timer tick system.
The Kconfig does not allow to [un]select a governor, thus both are compiled in
the kernel but the init order makes the menu governor to be the last one to be
registered, so becoming the default. The only way to switch back to the ladder
governor is to enable the sysfs governor switch in the kernel command line.
Because it seems nobody complained about this, the menu governor is used by
default most of the time on the system, having both governors is not really
necessary on a tickless system but there isn't a config option to disable one
or another governor.
Create a submenu for cpuidle and add a label for each governor, so we can see
the option in the menu config and enable/disable it.
The governors will be enabled depending on the CONFIG_NO_HZ option:
- If CONFIG_NO_HZ is set, then the menu governor is selected and the ladder
governor is optional, defaulting to 'yes'
- If CONFIG_NO_HZ is not set, then the ladder governor is selected and the
menu governor is optional, defaulting to 'yes'
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
---
drivers/cpuidle/Kconfig | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c4cc27e..e997f15 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -1,7 +1,9 @@
-config CPU_IDLE
+menuconfig CPU_IDLE
bool "CPU idle PM support"
default y if ACPI || PPC_PSERIES
+ select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+ select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE)
help
CPU idle is a generic framework for supporting software-controlled
idle processor power management. It includes modular cross-platform
@@ -9,9 +11,10 @@ config CPU_IDLE
If you're using an ACPI-enabled platform, you should say Y here.
+if CPU_IDLE
+
config CPU_IDLE_MULTIPLE_DRIVERS
bool "Support multiple cpuidle drivers"
- depends on CPU_IDLE
default n
help
Allows the cpuidle framework to use different drivers for each CPU.
@@ -19,24 +22,19 @@ config CPU_IDLE_MULTIPLE_DRIVERS
states. If unsure say N.
config CPU_IDLE_GOV_LADDER
- bool
- depends on CPU_IDLE
+ bool "Ladder governor (for periodic timer tick)"
default y
config CPU_IDLE_GOV_MENU
- bool
- depends on CPU_IDLE && NO_HZ
+ bool "Menu governor (for tickless system)"
default y
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
-if CPU_IDLE
-
config CPU_IDLE_CALXEDA
bool "CPU Idle Driver for Calxeda processors"
depends on ARCH_HIGHBANK
help
Select this to enable cpuidle on Calxeda processors.
-
endif
--
1.7.9.5
=== Highlights ===
* Had a good call with Arnd, Jesse, Serban and Rebecca on issues facing
upstreaming ION
* Refactored the ION patchset trying to simplify things a bit, sent out
draft to Jesse, Arnd and Serban
* *Finally* got around to finishing and posting my writeup on earlysuspend
https://plus.google.com/u/0/111524780435806926688/posts/RCV8EP3hFEm
* Responded to community questions from my lsf-mm summary.
* Sent the patch "Revert ARM: Make low-level printk work" patch to AOSP,
not sure if Arve will merge it.
* Reviewed Dmitry's VFAT_GET_VOLUME_ID patches
* Reviewed blueprints and sent out weekly Android upstreaming sub-team
status
* Lots of time reviewing the ION code to better understand current
issues prior to meeting.
=== Plans ===
* Get through the pending patches in my inbox and queue them for upstream
* More work on refactoring the ION patchset and addressing some of the
bigger issues from our discussion.
* Get back to Minchan on some of his private volatile range questions
=== Issues ===
* One of my git.linaro.org repos had some strange permission changes on
some of the objects, which is keeping me from being able to push changes
out. Filed an IT ticket
== Linus Walleij linusw ==
=== Highlights ===
* Collected and prepared a set of pinctrl fixes and sent
pull request to Torvalds. He pulled them in.
* Collected and prepared a set of GPIO fixes and sent
pull request to Torvalds. He pulled them in.
* Continued to apply DMA40 patches from Lee on a
specific DMA40 branch targeted for ARM SoC. Prepared
a first pull request with the first set of Lee's patches
after Vinod and Herbert ACKed them.
* Reviewed lots of pinctrl code, and some GPIO code.
* Prepared and sent a clocks-to-devicetree patch set for
the U300. (Tying up the sack on U300.) This is now
under review.
* Sent a pull request for the queued patches to move
Nomadik to use device tree for the clocks.
* Fixed ethernet to work in Nomadik S8815.
* Poked around with Nomadik S8815 user LED, and
user key using the standard GPIO bindings.
* Converted Nomadik to use DT for pin control.
* Asked for a NHK15 board to fix the support for this
board as well.
* Being pleased with Lee & Fabio taking on ever more
complex tasks and doing a good job overall for ux500.
=== Plans ===
* Finalize U300 DT+multiplatform patch set. Maybe send
a partial pull request with the stuff I have.
* Integrate Integrator PCI patch set, then start to delete
board files and convert to multiplatform.
* Convert Nomadik pinctrl driver to register GPIO ranges
from the gpiochip side.
* Test the PL08x patches on the Ericsson Research
PB11MPCore and submit platform data for using
pl08x DMA on that platform.
* Get hands dirty with regmap.
=== Issues ===
* Subsystem maintainers in the kernel community are forced
to act as standardization comittee for device tree bindings
due to lack of review power. As they have to merge the
bindings in the end, reviewing fall upward to the subsystem
maintainers, this is something of a workload issue.
* Need a PCI branch at ARM SoC to request Integrator PCI
DT patches to be pulled into. (Still blocked on this for
Integrator single zImage.)
* Some impediments from internal turmoil @ST-Ericsson.
Thanks,
Linus Walleij
This patch series does the following:
1) Factors out possible common code, unifies the clk strutures used
for PLL35XX & PLL36XX and usues clk->base instead of clk->con0
2) Defines a common rate_table which will contain recommended p, m, s and k
values for supported rates that needs to be changed for changing
corresponding PLL's rate
3) Adds set_rate() and round_rate() clk_ops for PLL35XX and PLL36XXX
Is rebased on branch kgene's "for-next"
https://git.kernel.org/cgit/linux/kernel/git/kgene/linux-samsung.git/log/?h…
And tested these patch on chromebook for EPLL settings for Audio on our chrome tree.
Vikas Sajjan (2):
clk: samsung: Add set_rate() clk_ops for PLL36XX
clk: samsung: Add EPLL and VPLL freq table for exynos5250 SoC
Yadwinder Singh Brar (3):
clk: samsung: Use clk->base instead of directly using clk->con0 for
PLL3XXX
clk: samsung: Add support to register rate_table for PLL3XXX
clk: samsung: Add set_rate() clk_ops for PLL35XX
drivers/clk/samsung/clk-exynos4.c | 10 +-
drivers/clk/samsung/clk-exynos5250.c | 29 +++-
drivers/clk/samsung/clk-pll.c | 243 ++++++++++++++++++++++++++++++----
drivers/clk/samsung/clk-pll.h | 27 +++-
4 files changed, 272 insertions(+), 37 deletions(-)
--
1.7.9.5
Hi,
This patchset takes advantage of the new per-task load tracking that is
available in the kernel for packing the tasks in as few as possible
CPU/Cluster/Core. It has got 2 packing modes:
-The 1st mode packs the small tasks when the system is not too busy. The main
goal is to reduce the power consumption in the low system load use cases by
minimizing the number of power domain that are enabled but it also keeps the
default behavior which is performance oriented.
-The 2nd mode packs all tasks in as few as possible power domains in order to
improve the power consumption of the system but at the cost of possible
performance decrease because of the increase of the rate of ressources sharing
compared to the default mode.
The packing is done in 3 steps (the last step is only applicable for the
agressive packing mode):
The 1st step looks for the best place to pack tasks in a system according to
its topology and it defines a 1st pack buddy CPU for each CPU if there is one
available. The policy for defining a buddy CPU is that we want to pack at
levels where a group of CPU can be power gated independently from others. To
describe this capability, a new flag SD_SHARE_POWERDOMAIN has been introduced,
that is used to indicate whether the groups of CPUs of a scheduling domain
share their power state. By default, this flag is set in all sched_domain in
order to keep unchanged the current behavior of the scheduler and only ARM
platform clears the SD_SHARE_POWERDOMAIN flag for MC and CPU level.
In a 2nd step, the scheduler checks the load average of a task which wakes up
as well as the load average of the buddy CPU and it can decide to migrate the
light tasks on a not busy buddy. This check is done during the wake up because
small tasks tend to wake up between periodic load balance and asynchronously
to each other which prevents the default mechanism to catch and migrate them
efficiently. A light task is defined by a runnable_avg_sum that is less than
20% of the runnable_avg_period. In fact, the former condition encloses 2 ones:
The average CPU load of the task must be less than 20% and the task must have
been runnable less than 10ms when it woke up last time in order to be
electable for the packing migration. So, a task than runs 1 ms each 5ms will
be considered as a small task but a task that runs 50 ms with a period of
500ms, will not.
Then, the business of the buddy CPU depends of the load average for the rq and
the number of running tasks. A CPU with a load average greater than 50% will
be considered as busy CPU whatever the number of running tasks is and this
threshold will be reduced by the number of running tasks in order to not
increase too much the wake up latency of a task. When the buddy CPU is busy,
the scheduler falls back to default CFS policy.
The 3rd step is only used when the agressive packing mode is enable. In this
case, the CPUs pack their tasks in their buddy until they becomes full. Unlike
the previous step, we can't keep the same buddy so we update it during load
balance. During the periodic load balance, the scheduler computes the activity
of the system thanks the runnable_avg_sum and the cpu_power of all CPUs and
then it defines the CPUs that will be used to handle the current activity. The
selected CPUs will be their own buddy and will participate to the default
load balancing mecanism in order to share the tasks in a fair way, whereas the
not selected CPUs will not, and their buddy will be the last selected CPU.
The behavior can be summarized as: The scheduler defines how many CPUs are
required to handle the current activity, keeps the tasks on these CPUS and
perform normal load balancing (or any evolution of the current load balancer
like the use of runnable load avg from Alex https://lkml.org/lkml/2013/4/1/580)
on this limited number of CPUs . Like the other steps, the CPUs are selected to
minimize the number of power domain that must stay on.
Change since V3:
- Take into account comments on previous version.
- Add an agressive packing mode and a knob to select between the various mode
Change since V2:
- Migrate only a task that wakes up
- Change the light tasks threshold to 20%
- Change the loaded CPU threshold to not pull tasks if the current number of
running tasks is null but the load average is already greater than 50%
- Fix the algorithm for selecting the buddy CPU.
Change since V1:
Patch 2/6
- Change the flag name which was not clear. The new name is
SD_SHARE_POWERDOMAIN.
- Create an architecture dependent function to tune the sched_domain flags
Patch 3/6
- Fix issues in the algorithm that looks for the best buddy CPU
- Use pr_debug instead of pr_info
- Fix for uniprocessor
Patch 4/6
- Remove the use of usage_avg_sum which has not been merged
Patch 5/6
- Change the way the coherency of runnable_avg_sum and runnable_avg_period is
ensured
Patch 6/6
- Use the arch dependent function to set/clear SD_SHARE_POWERDOMAIN for ARM
platform
Previous results for v3:
This series has been tested with hackbench on ARM platform and the results
don't show any performance regression
Hackbench 3.9-rc2 +patches
Mean Time (10 tests): 2.048 2.015
stdev : 0.047 0.068
Previous results for V2:
This series has been tested with MP3 play back on ARM platform:
TC2 HMP (dual CA-15 and 3xCA-7 cluster).
The measurements have been done on an Ubuntu image during 60 seconds of
playback and the result has been normalized to 100.
| CA15 | CA7 | total |
-------------------------------------
default | 81 | 97 | 178 |
pack | 13 | 100 | 113 |
-------------------------------------
Previous results for V1:
The patch-set has been tested on ARM platforms: quad CA-9 SMP and TC2 HMP
(dual CA-15 and 3xCA-7 cluster). For ARM platform, the results have
demonstrated that it's worth packing small tasks at all topology levels.
The performance tests have been done on both platforms with sysbench. The
results don't show any performance regressions. These results are aligned with
the policy which uses the normal behavior with heavy use cases.
test: sysbench --test=cpu --num-threads=N --max-requests=R run
Results below is the average duration of 3 tests on the quad CA-9.
default is the current scheduler behavior (pack buddy CPU is -1)
pack is the scheduler with the pack mechanism
| default | pack |
-----------------------------------
N=8; R=200 | 3.1999 | 3.1921 |
N=8; R=2000 | 31.4939 | 31.4844 |
N=12; R=200 | 3.2043 | 3.2084 |
N=12; R=2000 | 31.4897 | 31.4831 |
N=16; R=200 | 3.1774 | 3.1824 |
N=16; R=2000 | 31.4899 | 31.4897 |
-----------------------------------
The power consumption tests have been done only on TC2 platform which has got
accessible power lines and I have used cyclictest to simulate small tasks. The
tests show some power consumption improvements.
test: cyclictest -t 8 -q -e 1000000 -D 20 & cyclictest -t 8 -q -e 1000000 -D 20
The measurements have been done during 16 seconds and the result has been
normalized to 100
| CA15 | CA7 | total |
-------------------------------------
default | 100 | 40 | 140 |
pack | <1 | 45 | <46 |
-------------------------------------
The A15 cluster is less power efficient than the A7 cluster but if we assume
that the tasks is well spread on both clusters, we can guest estimate that the
power consumption on a dual cluster of CA7 would have been for a default
kernel:
| CA7 | CA7 | total |
-------------------------------------
default | 40 | 40 | 80 |
-------------------------------------
Vincent Guittot (14):
Revert "sched: Introduce temporary FAIR_GROUP_SCHED dependency for
load-tracking"
sched: add a new SD_SHARE_POWERDOMAIN flag for sched_domain
sched: pack small tasks
sched: pack the idle load balance
ARM: sched: clear SD_SHARE_POWERDOMAIN
sched: add a knob to choose the packing level
sched: agressively pack at wake/fork/exec
sched: trig ILB on an idle buddy
sched: evaluate the activity level of the system
sched: update the buddy CPU
sched: filter task pull request
sched: create a new field with available capacity
sched: update the cpu_power
sched: force migration on buddy CPU
arch/arm/kernel/topology.c | 9 +
arch/ia64/include/asm/topology.h | 1 +
arch/tile/include/asm/topology.h | 1 +
include/linux/sched.h | 11 +-
include/linux/sched/sysctl.h | 8 +
include/linux/topology.h | 4 +
kernel/sched/core.c | 14 +-
kernel/sched/fair.c | 393 +++++++++++++++++++++++++++++++++++---
kernel/sched/sched.h | 15 +-
kernel/sysctl.c | 13 ++
10 files changed, 423 insertions(+), 46 deletions(-)
--
1.7.9.5