The powerclamp cooling device cur_state shows actual idle observed by
package C-state idle counters. But the implementation is not sufficient
for multi package or multi die system. The cur_state value is incorrect.
On these systems, these counters must be read from each package/die and
somehow aggregate them. But there is no good method for aggregation.
It was not a problem when explicit CPU model addition was required to
enable intel powerclamp. In this way certain CPU models could have
been avoided. But with the removal of CPU model check with the
availability of Package C-state counters, the driver is loaded on most
of the recent systems.
For multi package/die systems, just show the actual target idle state,
the system is trying to achieve. In powerclamp this is the user set
state minus one.
Also there is no use of starting a worker thread for polling package
C-state counters and applying any compensation for multiple package
or multiple die systems.
Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # 4.14+
---
v2:
Changed: (true == clamping) to (clamping)
Updated commit description for the last paragraph
drivers/thermal/intel/intel_powerclamp.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..2f4cbfdf26a0 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -57,6 +57,7 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
/* user selected target */
static unsigned int set_target_ratio;
@@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -519,7 +523,8 @@ static int start_power_clamp(void)
control_cpu = cpumask_first(cpu_online_mask);
clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one kthread worker per online cpu */
for_each_online_cpu(cpu) {
@@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
+ if (clamping) {
+ if (poll_pkg_cstate_enable)
+ *state = pkg_cstate_ratio_cur;
+ else
+ *state = set_target_ratio;
+ } else {
/* to save power, do not poll idle ratio while not clamping */
*state = -1; /* indicates invalid state */
+ }
return 0;
}
@@ -712,6 +721,9 @@ static int __init powerclamp_init(void)
goto exit_unregister;
}
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
&powerclamp_cooling_ops);
if (IS_ERR(cooling_dev)) {
--
2.39.1
From: Yu Kuai <yukuai3(a)huawei.com>
If cfqq is merged with another cfqq(assume that no cfqq is merged to
this cfqq) and then user thread is moved to another cgroup, then
check_blkcg_changed() will set cic->cfqq[1] to NULL. However, this cfqq is
still merged and no one will referece this cfqq, which will cause
following kmemleak:
comm "cgexec", pid 2541110, jiffies 4317578508 (age 5728.064s)
hex dump (first 32 bytes):
03 00 00 00 90 03 00 00 80 8d 0f be 20 80 ff ff ............ ...
a0 16 c6 a1 21 80 ff ff 00 00 00 00 00 00 00 00 ....!...........
backtrace:
[<0000000071d2775e>] kmem_cache_alloc_node+0x13c/0x660
[<00000000e827e6fd>] cfq_get_queue+0x318/0x6f0
[<00000000945249ee>] cfq_set_request+0x724/0xe38
[<00000000af64d5a9>] elv_set_request+0x84/0xd0
[<0000000047344f0d>] __get_request+0x970/0xf90
[<00000000d016bd51>] get_request+0x278/0x970
[<0000000052512d36>] blk_queue_bio+0x278/0xcc0
[<0000000085282101>] generic_make_request+0x3c0/0x778
[<0000000089f69c24>] submit_bio+0xdc/0x408
[<00000000748509ae>] ext4_mpage_readpages+0xc84/0x13b8 [ext4]
[<0000000078bfc705>] ext4_readpages+0x80/0xc0 [ext4]
[<00000000317f2ed7>] read_pages+0xd0/0x610
[<00000000e7ab01d2>] __do_page_cache_readahead+0x36c/0x430
[<0000000071df2285>] ondemand_readahead+0x24c/0x6b0
[<00000000124a824a>] page_cache_sync_readahead+0x188/0x448
[<00000000bca561ae>] generic_file_buffered_read+0x648/0x2b58
unreferenced object 0xffffa028411d4720 (size 240):
Fix the problem by put cooperator in check_blkcg_changed(), so that old
cfqq can be freed.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
---
block/cfq-iosched.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 2eb87444b157..88bae553ece3 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3778,6 +3778,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
if (cfqq) {
cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
cic_set_cfqq(cic, NULL, true);
+ cfq_put_cooperator(cfqq);
cfq_put_queue(cfqq);
}
--
2.31.1
The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.
Switch to using the new API where the provider is not registered until
after it has been fully initialised.
Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework")
Cc: stable(a)vger.kernel.org # 5.11
Cc: Dmitry Osipenko <digetx(a)gmail.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/memory/tegra/tegra30-emc.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/memory/tegra/tegra30-emc.c b/drivers/memory/tegra/tegra30-emc.c
index 77706e9bc543..c91e9b7e2e01 100644
--- a/drivers/memory/tegra/tegra30-emc.c
+++ b/drivers/memory/tegra/tegra30-emc.c
@@ -1533,15 +1533,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
emc->provider.aggregate = soc->icc_ops->aggregate;
emc->provider.xlate_extended = emc_of_icc_xlate_extended;
- err = icc_provider_add(&emc->provider);
- if (err)
- goto err_msg;
+ icc_provider_init(&emc->provider);
/* create External Memory Controller node */
node = icc_node_create(TEGRA_ICC_EMC);
if (IS_ERR(node)) {
err = PTR_ERR(node);
- goto del_provider;
+ goto err_msg;
}
node->name = "External Memory Controller";
@@ -1562,12 +1560,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
node->name = "External Memory (DRAM)";
icc_node_add(node, &emc->provider);
+ err = icc_provider_register(&emc->provider);
+ if (err)
+ goto remove_nodes;
+
return 0;
remove_nodes:
icc_nodes_remove(&emc->provider);
-del_provider:
- icc_provider_del(&emc->provider);
err_msg:
dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
--
2.39.1
The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.
Switch to using the new API where the provider is not registered until
after it has been fully initialised.
Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework")
Cc: stable(a)vger.kernel.org # 5.11
Cc: Dmitry Osipenko <digetx(a)gmail.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/memory/tegra/tegra20-emc.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
index bd4e37b6552d..fd595c851a27 100644
--- a/drivers/memory/tegra/tegra20-emc.c
+++ b/drivers/memory/tegra/tegra20-emc.c
@@ -1021,15 +1021,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
emc->provider.aggregate = soc->icc_ops->aggregate;
emc->provider.xlate_extended = emc_of_icc_xlate_extended;
- err = icc_provider_add(&emc->provider);
- if (err)
- goto err_msg;
+ icc_provider_init(&emc->provider);
/* create External Memory Controller node */
node = icc_node_create(TEGRA_ICC_EMC);
if (IS_ERR(node)) {
err = PTR_ERR(node);
- goto del_provider;
+ goto err_msg;
}
node->name = "External Memory Controller";
@@ -1050,12 +1048,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
node->name = "External Memory (DRAM)";
icc_node_add(node, &emc->provider);
+ err = icc_provider_register(&emc->provider);
+ if (err)
+ goto remove_nodes;
+
return 0;
remove_nodes:
icc_nodes_remove(&emc->provider);
-del_provider:
- icc_provider_del(&emc->provider);
err_msg:
dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
--
2.39.1
The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.
Switch to using the new API where the provider is not registered until
after it has been fully initialised.
Fixes: 380def2d4cf2 ("memory: tegra124: Support interconnect framework")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Dmitry Osipenko <digetx(a)gmail.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/memory/tegra/tegra124-emc.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/memory/tegra/tegra124-emc.c b/drivers/memory/tegra/tegra124-emc.c
index 85bc936c02f9..00ed2b6a0d1b 100644
--- a/drivers/memory/tegra/tegra124-emc.c
+++ b/drivers/memory/tegra/tegra124-emc.c
@@ -1351,15 +1351,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
emc->provider.aggregate = soc->icc_ops->aggregate;
emc->provider.xlate_extended = emc_of_icc_xlate_extended;
- err = icc_provider_add(&emc->provider);
- if (err)
- goto err_msg;
+ icc_provider_init(&emc->provider);
/* create External Memory Controller node */
node = icc_node_create(TEGRA_ICC_EMC);
if (IS_ERR(node)) {
err = PTR_ERR(node);
- goto del_provider;
+ goto err_msg;
}
node->name = "External Memory Controller";
@@ -1380,12 +1378,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
node->name = "External Memory (DRAM)";
icc_node_add(node, &emc->provider);
+ err = icc_provider_register(&emc->provider);
+ if (err)
+ goto remove_nodes;
+
return 0;
remove_nodes:
icc_nodes_remove(&emc->provider);
-del_provider:
- icc_provider_del(&emc->provider);
err_msg:
dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
--
2.39.1