On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com --- drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) { - unsigned long *freq_table = devfreq->profile->freq_table; + unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */ - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) { + if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0]; - *max_freq = freq_table[devfreq->profile->max_state - 1]; + *max_freq = freq_table[devfreq->max_state - 1]; } else { - *min_freq = freq_table[devfreq->profile->max_state - 1]; + *min_freq = freq_table[devfreq->max_state - 1]; *max_freq = freq_table[0]; }
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++) - if (freq == devfreq->profile->freq_table[lev]) + for (lev = 0; lev < devfreq->max_state; lev++) + if (freq == devfreq->freq_table[lev]) return lev;
return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
static int set_freq_table(struct devfreq *devfreq) { - struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count; @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count; - profile->freq_table = devm_kcalloc(devfreq->dev.parent, - profile->max_state, - sizeof(*profile->freq_table), - GFP_KERNEL); - if (!profile->freq_table) { - profile->max_state = 0; + devfreq->max_state = count; + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent, + devfreq->max_state, + sizeof(*devfreq->freq_table), + GFP_KERNEL); + if (!devfreq->freq_table) return -ENOMEM; - }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) { + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) { - devm_kfree(devfreq->dev.parent, profile->freq_table); - profile->max_state = 0; + devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp); } dev_pm_opp_put(opp); - profile->freq_table[i] = freq; + devfreq->freq_table[i] = freq; }
return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
if (lev != prev_lev) { devfreq->stats.trans_table[ - (prev_lev * devfreq->profile->max_state) + lev]++; + (prev_lev * devfreq->max_state) + lev]++; devfreq->stats.total_trans++; }
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock); + } else { + devfreq->freq_table = devfreq->profile->freq_table; + devfreq->max_state = devfreq->profile->max_state; }
devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int), - devfreq->profile->max_state, - devfreq->profile->max_state), + devfreq->max_state, + devfreq->max_state), GFP_KERNEL); if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock); @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, }
devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev, - devfreq->profile->max_state, + devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL); if (!devfreq->stats.time_in_state) { @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++) + for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), - "%lu ", df->profile->freq_table[i]); + "%lu ", df->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
if (!df->profile) return -EINVAL; - max_state = df->profile->max_state; + max_state = df->max_state;
if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu", - df->profile->freq_table[i]); + df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n");
for (i = 0; i < max_state; i++) { - if (df->profile->freq_table[i] - == df->previous_freq) { + if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*"); - } else { + else len += sprintf(buf + len, " "); - } - len += sprintf(buf + len, "%10lu:", - df->profile->freq_table[i]); + + len += sprintf(buf + len, "%10lu:", df->freq_table[i]); for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]); @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0) + if (df->max_state == 0) return count;
err = kstrtoint(buf, 10, &value); @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL;
mutex_lock(&df->lock); - memset(df->stats.time_in_state, 0, (df->profile->max_state * + memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int), - df->profile->max_state, - df->profile->max_state)); + df->max_state, + df->max_state)); df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock); diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out;
/* Use interpolation if required opps is not available */ - for (i = 0; i < parent_devfreq->profile->max_state; i++) - if (parent_devfreq->profile->freq_table[i] == *freq) + for (i = 0; i < parent_devfreq->max_state; i++) + if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state) + if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) { - child_freq = devfreq->profile->freq_table[i]; + if (i < devfreq->max_state) { + child_freq = devfreq->freq_table[i]; } else { - count = devfreq->profile->max_state; - child_freq = devfreq->profile->freq_table[count - 1]; + count = devfreq->max_state; + child_freq = devfreq->freq_table[count - 1]; }
out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats { * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. * @work: delayed work for load monitoring. + * @freq_table: current frequency table used by the devfreq driver. + * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics * @data: Private data of the governor. The devfreq framework does not @@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
+ unsigned long *freq_table; + unsigned int max_state; + unsigned long previous_freq; struct devfreq_dev_status last_status;
On 22. 6. 20. 07:03, Christian Marangi wrote:
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com
drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) {
- unsigned long *freq_table = devfreq->profile->freq_table;
- unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
- if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0];
*max_freq = freq_table[devfreq->profile->max_state - 1];
} else {*max_freq = freq_table[devfreq->max_state - 1];
*min_freq = freq_table[devfreq->profile->max_state - 1];
*max_freq = freq_table[0]; }*min_freq = freq_table[devfreq->max_state - 1];
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
if (freq == devfreq->profile->freq_table[lev])
- for (lev = 0; lev < devfreq->max_state; lev++)
if (freq == devfreq->freq_table[lev]) return lev;
return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) {
- struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
profile->max_state,
sizeof(*profile->freq_table),
GFP_KERNEL);
- if (!profile->freq_table) {
profile->max_state = 0;
- devfreq->max_state = count;
- devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
devfreq->max_state,
sizeof(*devfreq->freq_table),
GFP_KERNEL);
- if (!devfreq->freq_table) return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
- for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) {
devm_kfree(devfreq->dev.parent, profile->freq_table);
profile->max_state = 0;
} dev_pm_opp_put(opp);devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp);
profile->freq_table[i] = freq;
}devfreq->freq_table[i] = freq;
return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[
(prev_lev * devfreq->profile->max_state) + lev]++;
devfreq->stats.total_trans++; }(prev_lev * devfreq->max_state) + lev]++;
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock);
- } else {
devfreq->freq_table = devfreq->profile->freq_table;
}devfreq->max_state = devfreq->profile->max_state;
devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int),
devfreq->profile->max_state,
devfreq->profile->max_state),
devfreq->max_state,
if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock);devfreq->max_state), GFP_KERNEL);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
devfreq->profile->max_state,
if (!devfreq->stats.time_in_state) {devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL);
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
- for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
"%lu ", df->profile->freq_table[i]);
"%lu ", df->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL;
- max_state = df->profile->max_state;
- max_state = df->max_state;
if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu",
df->profile->freq_table[i]);
df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) {
if (df->profile->freq_table[i]
== df->previous_freq) {
if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*");
} else {
else len += sprintf(buf + len, " ");
}
len += sprintf(buf + len, "%10lu:",
df->profile->freq_table[i]);
for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]);len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0)
- if (df->max_state == 0) return count;
err = kstrtoint(buf, 10, &value); @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
- memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
df->profile->max_state,
df->profile->max_state));
df->max_state,
df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock);df->max_state));
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
- for (i = 0; i < parent_devfreq->max_state; i++)
if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state)
- if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
- if (i < devfreq->max_state) {
} else {child_freq = devfreq->freq_table[i];
count = devfreq->profile->max_state;
child_freq = devfreq->profile->freq_table[count - 1];
count = devfreq->max_state;
}child_freq = devfreq->freq_table[count - 1];
out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats {
reevaluate operable frequencies. Devfreq users may use
devfreq.nb to the corresponding register notifier call chain.
- @work: delayed work for load monitoring.
- @freq_table: current frequency table used by the devfreq driver.
- @max_state: count of entry present in the frequency table.
- @previous_freq: previously configured frequency value.
- @last_status: devfreq user device info, performance statistics
- @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
- unsigned long *freq_table;
- unsigned int max_state;
- unsigned long previous_freq; struct devfreq_dev_status last_status;
Applied it. Thanks.
Hi All,
On 20.06.2022 00:03, Christian Marangi wrote:
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com
This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct"). Unfortunately it causes the following regression on my Exynos based test systems:
8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at exynos_bus_probe+0x604/0x684 LR is at device_add+0x14c/0x908 pc : [<c090aef4>] lr : [<c06cf77c>] psr: 80000053 ... Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval)) Stack: (0xf0a15d30 to 0xf0a16000) ... exynos_bus_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa4/0x11c __device_attach_driver from bus_for_each_drv+0x7c/0xc0 bus_for_each_drv from __device_attach+0xac/0x20c __device_attach from bus_probe_device+0x88/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x288/0x774 process_one_work from worker_thread+0x44/0x504 worker_thread from kthread+0xf4/0x128 kthread from ret_from_fork+0x14/0x2c Exception stack(0xf0a15fb0 to 0xf0a15ff8) ... ---[ end trace 0000000000000000 ]---
This issue is caused by bus->devfreq->profile->freq_table being NULL here:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/dri...
drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) {
- unsigned long *freq_table = devfreq->profile->freq_table;
- unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
- if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0];
*max_freq = freq_table[devfreq->profile->max_state - 1];
} else {*max_freq = freq_table[devfreq->max_state - 1];
*min_freq = freq_table[devfreq->profile->max_state - 1];
*max_freq = freq_table[0]; }*min_freq = freq_table[devfreq->max_state - 1];
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
if (freq == devfreq->profile->freq_table[lev])
- for (lev = 0; lev < devfreq->max_state; lev++)
if (freq == devfreq->freq_table[lev]) return lev;
return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) {
- struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
profile->max_state,
sizeof(*profile->freq_table),
GFP_KERNEL);
- if (!profile->freq_table) {
profile->max_state = 0;
- devfreq->max_state = count;
- devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
devfreq->max_state,
sizeof(*devfreq->freq_table),
GFP_KERNEL);
- if (!devfreq->freq_table) return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
- for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) {
devm_kfree(devfreq->dev.parent, profile->freq_table);
profile->max_state = 0;
} dev_pm_opp_put(opp);devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp);
profile->freq_table[i] = freq;
}devfreq->freq_table[i] = freq;
return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[
(prev_lev * devfreq->profile->max_state) + lev]++;
devfreq->stats.total_trans++; }(prev_lev * devfreq->max_state) + lev]++;
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock);
- } else {
devfreq->freq_table = devfreq->profile->freq_table;
}devfreq->max_state = devfreq->profile->max_state;
devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int),
devfreq->profile->max_state,
devfreq->profile->max_state),
devfreq->max_state,
if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock);devfreq->max_state), GFP_KERNEL);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
devfreq->profile->max_state,
if (!devfreq->stats.time_in_state) {devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL);
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
- for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
"%lu ", df->profile->freq_table[i]);
"%lu ", df->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL;
- max_state = df->profile->max_state;
- max_state = df->max_state;
if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu",
df->profile->freq_table[i]);
df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) {
if (df->profile->freq_table[i]
== df->previous_freq) {
if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*");
} else {
else len += sprintf(buf + len, " ");
}
len += sprintf(buf + len, "%10lu:",
df->profile->freq_table[i]);
for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]);len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0)
- if (df->max_state == 0) return count;
err = kstrtoint(buf, 10, &value); @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
- memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
df->profile->max_state,
df->profile->max_state));
df->max_state,
df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock);df->max_state));
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
- for (i = 0; i < parent_devfreq->max_state; i++)
if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state)
- if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
- if (i < devfreq->max_state) {
} else {child_freq = devfreq->freq_table[i];
count = devfreq->profile->max_state;
child_freq = devfreq->profile->freq_table[count - 1];
count = devfreq->max_state;
}child_freq = devfreq->freq_table[count - 1];
out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats {
reevaluate operable frequencies. Devfreq users may use
devfreq.nb to the corresponding register notifier call chain.
- @work: delayed work for load monitoring.
- @freq_table: current frequency table used by the devfreq driver.
- @max_state: count of entry present in the frequency table.
- @previous_freq: previously configured frequency value.
- @last_status: devfreq user device info, performance statistics
- @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
- unsigned long *freq_table;
- unsigned int max_state;
- unsigned long previous_freq; struct devfreq_dev_status last_status;
Best regards
On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
Hi All,
On 20.06.2022 00:03, Christian Marangi wrote:
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com
This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct"). Unfortunately it causes the following regression on my Exynos based test systems:
8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at exynos_bus_probe+0x604/0x684 LR is at device_add+0x14c/0x908 pc : [<c090aef4>] lr : [<c06cf77c>] psr: 80000053 ... Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval)) Stack: (0xf0a15d30 to 0xf0a16000) ... exynos_bus_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa4/0x11c __device_attach_driver from bus_for_each_drv+0x7c/0xc0 bus_for_each_drv from __device_attach+0xac/0x20c __device_attach from bus_probe_device+0x88/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x288/0x774 process_one_work from worker_thread+0x44/0x504 worker_thread from kthread+0xf4/0x128 kthread from ret_from_fork+0x14/0x2c Exception stack(0xf0a15fb0 to 0xf0a15ff8) ... ---[ end trace 0000000000000000 ]---
This issue is caused by bus->devfreq->profile->freq_table being NULL here:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/dri...
I just checked this and the bug is caused by a simple pr_info...
Can you test the following patch just to make sure?
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index b5615e667e31..79725bbb4bb0 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev) } }
- max_state = bus->devfreq->profile->max_state; - min_freq = (bus->devfreq->profile->freq_table[0] / 1000); - max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000); + max_state = bus->devfreq->max_state; + min_freq = (bus->devfreq->freq_table[0] / 1000); + max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000); pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n", dev_name(dev), min_freq, max_freq);
drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) {
- unsigned long *freq_table = devfreq->profile->freq_table;
- unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
- if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0];
*max_freq = freq_table[devfreq->profile->max_state - 1];
} else {*max_freq = freq_table[devfreq->max_state - 1];
*min_freq = freq_table[devfreq->profile->max_state - 1];
*max_freq = freq_table[0]; }*min_freq = freq_table[devfreq->max_state - 1];
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
if (freq == devfreq->profile->freq_table[lev])
- for (lev = 0; lev < devfreq->max_state; lev++)
if (freq == devfreq->freq_table[lev]) return lev;
return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) {
- struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
profile->max_state,
sizeof(*profile->freq_table),
GFP_KERNEL);
- if (!profile->freq_table) {
profile->max_state = 0;
- devfreq->max_state = count;
- devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
devfreq->max_state,
sizeof(*devfreq->freq_table),
GFP_KERNEL);
- if (!devfreq->freq_table) return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
- for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) {
devm_kfree(devfreq->dev.parent, profile->freq_table);
profile->max_state = 0;
} dev_pm_opp_put(opp);devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp);
profile->freq_table[i] = freq;
}devfreq->freq_table[i] = freq;
return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[
(prev_lev * devfreq->profile->max_state) + lev]++;
devfreq->stats.total_trans++; }(prev_lev * devfreq->max_state) + lev]++;
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock);
- } else {
devfreq->freq_table = devfreq->profile->freq_table;
}devfreq->max_state = devfreq->profile->max_state;
devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int),
devfreq->profile->max_state,
devfreq->profile->max_state),
devfreq->max_state,
if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock);devfreq->max_state), GFP_KERNEL);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
devfreq->profile->max_state,
if (!devfreq->stats.time_in_state) {devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL);
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
- for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
"%lu ", df->profile->freq_table[i]);
"%lu ", df->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL;
- max_state = df->profile->max_state;
- max_state = df->max_state;
if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu",
df->profile->freq_table[i]);
df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) {
if (df->profile->freq_table[i]
== df->previous_freq) {
if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*");
} else {
else len += sprintf(buf + len, " ");
}
len += sprintf(buf + len, "%10lu:",
df->profile->freq_table[i]);
for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]);len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0)
- if (df->max_state == 0) return count;
err = kstrtoint(buf, 10, &value); @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
- memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
df->profile->max_state,
df->profile->max_state));
df->max_state,
df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock);df->max_state));
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
- for (i = 0; i < parent_devfreq->max_state; i++)
if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state)
- if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
- if (i < devfreq->max_state) {
} else {child_freq = devfreq->freq_table[i];
count = devfreq->profile->max_state;
child_freq = devfreq->profile->freq_table[count - 1];
count = devfreq->max_state;
}child_freq = devfreq->freq_table[count - 1];
out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats {
reevaluate operable frequencies. Devfreq users may use
devfreq.nb to the corresponding register notifier call chain.
- @work: delayed work for load monitoring.
- @freq_table: current frequency table used by the devfreq driver.
- @max_state: count of entry present in the frequency table.
- @previous_freq: previously configured frequency value.
- @last_status: devfreq user device info, performance statistics
- @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
- unsigned long *freq_table;
- unsigned int max_state;
- unsigned long previous_freq; struct devfreq_dev_status last_status;
Best regards
Marek Szyprowski, PhD Samsung R&D Institute Poland
On Fri, Jul 01, 2022 at 01:28:50PM +0200, Christian Marangi wrote:
On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
Hi All,
On 20.06.2022 00:03, Christian Marangi wrote:
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com
This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct"). Unfortunately it causes the following regression on my Exynos based test systems:
8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at exynos_bus_probe+0x604/0x684 LR is at device_add+0x14c/0x908 pc : [<c090aef4>] lr : [<c06cf77c>] psr: 80000053 ... Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval)) Stack: (0xf0a15d30 to 0xf0a16000) ... exynos_bus_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa4/0x11c __device_attach_driver from bus_for_each_drv+0x7c/0xc0 bus_for_each_drv from __device_attach+0xac/0x20c __device_attach from bus_probe_device+0x88/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x288/0x774 process_one_work from worker_thread+0x44/0x504 worker_thread from kthread+0xf4/0x128 kthread from ret_from_fork+0x14/0x2c Exception stack(0xf0a15fb0 to 0xf0a15ff8) ... ---[ end trace 0000000000000000 ]---
This issue is caused by bus->devfreq->profile->freq_table being NULL here:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/dri...
I just checked this and the bug is caused by a simple pr_info...
Can you test the following patch just to make sure?
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index b5615e667e31..79725bbb4bb0 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev) } }
max_state = bus->devfreq->profile->max_state;
min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
max_state = bus->devfreq->max_state;
min_freq = (bus->devfreq->freq_table[0] / 1000);
max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000); pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n", dev_name(dev), min_freq, max_freq);
(BTW patch is ready, just waiting for your test and I will send it)
drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) {
- unsigned long *freq_table = devfreq->profile->freq_table;
- unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
- if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0];
*max_freq = freq_table[devfreq->profile->max_state - 1];
} else {*max_freq = freq_table[devfreq->max_state - 1];
*min_freq = freq_table[devfreq->profile->max_state - 1];
*max_freq = freq_table[0]; }*min_freq = freq_table[devfreq->max_state - 1];
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
if (freq == devfreq->profile->freq_table[lev])
- for (lev = 0; lev < devfreq->max_state; lev++)
if (freq == devfreq->freq_table[lev]) return lev;
return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) {
- struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
profile->max_state,
sizeof(*profile->freq_table),
GFP_KERNEL);
- if (!profile->freq_table) {
profile->max_state = 0;
- devfreq->max_state = count;
- devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
devfreq->max_state,
sizeof(*devfreq->freq_table),
GFP_KERNEL);
- if (!devfreq->freq_table) return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
- for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) {
devm_kfree(devfreq->dev.parent, profile->freq_table);
profile->max_state = 0;
} dev_pm_opp_put(opp);devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp);
profile->freq_table[i] = freq;
}devfreq->freq_table[i] = freq;
return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[
(prev_lev * devfreq->profile->max_state) + lev]++;
devfreq->stats.total_trans++; }(prev_lev * devfreq->max_state) + lev]++;
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock);
- } else {
devfreq->freq_table = devfreq->profile->freq_table;
}devfreq->max_state = devfreq->profile->max_state;
devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int),
devfreq->profile->max_state,
devfreq->profile->max_state),
devfreq->max_state,
if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock);devfreq->max_state), GFP_KERNEL);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
devfreq->profile->max_state,
if (!devfreq->stats.time_in_state) {devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL);
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
- for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
"%lu ", df->profile->freq_table[i]);
"%lu ", df->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL;
- max_state = df->profile->max_state;
- max_state = df->max_state;
if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu",
df->profile->freq_table[i]);
df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) {
if (df->profile->freq_table[i]
== df->previous_freq) {
if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*");
} else {
else len += sprintf(buf + len, " ");
}
len += sprintf(buf + len, "%10lu:",
df->profile->freq_table[i]);
for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]);len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0)
- if (df->max_state == 0) return count;
err = kstrtoint(buf, 10, &value); @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
- memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
df->profile->max_state,
df->profile->max_state));
df->max_state,
df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock);df->max_state));
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
- for (i = 0; i < parent_devfreq->max_state; i++)
if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state)
- if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
- if (i < devfreq->max_state) {
} else {child_freq = devfreq->freq_table[i];
count = devfreq->profile->max_state;
child_freq = devfreq->profile->freq_table[count - 1];
count = devfreq->max_state;
}child_freq = devfreq->freq_table[count - 1];
out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats {
reevaluate operable frequencies. Devfreq users may use
devfreq.nb to the corresponding register notifier call chain.
- @work: delayed work for load monitoring.
- @freq_table: current frequency table used by the devfreq driver.
- @max_state: count of entry present in the frequency table.
- @previous_freq: previously configured frequency value.
- @last_status: devfreq user device info, performance statistics
- @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
- unsigned long *freq_table;
- unsigned int max_state;
- unsigned long previous_freq; struct devfreq_dev_status last_status;
Best regards
Marek Szyprowski, PhD Samsung R&D Institute Poland
-- Ansuel
On 01.07.2022 13:28, Christian Marangi wrote:
On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
On 20.06.2022 00:03, Christian Marangi wrote:
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state.
This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table.
This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table.
To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it.
Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi ansuelsmth@gmail.com
This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct"). Unfortunately it causes the following regression on my Exynos based test systems:
8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at exynos_bus_probe+0x604/0x684 LR is at device_add+0x14c/0x908 pc : [<c090aef4>] lr : [<c06cf77c>] psr: 80000053 ... Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval)) Stack: (0xf0a15d30 to 0xf0a16000) ... exynos_bus_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa4/0x11c __device_attach_driver from bus_for_each_drv+0x7c/0xc0 bus_for_each_drv from __device_attach+0xac/0x20c __device_attach from bus_probe_device+0x88/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x288/0x774 process_one_work from worker_thread+0x44/0x504 worker_thread from kthread+0xf4/0x128 kthread from ret_from_fork+0x14/0x2c Exception stack(0xf0a15fb0 to 0xf0a15ff8) ... ---[ end trace 0000000000000000 ]---
This issue is caused by bus->devfreq->profile->freq_table being NULL here:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/dri...
I just checked this and the bug is caused by a simple pr_info...
Can you test the following patch just to make sure?
Yes, this fixes the issue. Thanks! Feel free to add:
Reported-by: Marek Szyprowski m.szyprowski@samsung.com
Tested-by: Marek Szyprowski m.szyprowski@samsung.com
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index b5615e667e31..79725bbb4bb0 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev) } }
max_state = bus->devfreq->profile->max_state;
min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
max_state = bus->devfreq->max_state;
min_freq = (bus->devfreq->freq_table[0] / 1000);
max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000); pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n", dev_name(dev), min_freq, max_freq);
drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- drivers/devfreq/governor_passive.c | 14 +++--- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf4548..2e2b3b414d67 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) {
- unsigned long *freq_table = devfreq->profile->freq_table;
- unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq; lockdep_assert_held(&devfreq->lock);
@@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
- if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0];
*max_freq = freq_table[devfreq->profile->max_state - 1];
} else {*max_freq = freq_table[devfreq->max_state - 1];
*min_freq = freq_table[devfreq->profile->max_state - 1];
}*min_freq = freq_table[devfreq->max_state - 1]; *max_freq = freq_table[0];
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
if (freq == devfreq->profile->freq_table[lev])
- for (lev = 0; lev < devfreq->max_state; lev++)
return -EINVAL;if (freq == devfreq->freq_table[lev]) return lev;
@@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) {
- struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
profile->max_state,
sizeof(*profile->freq_table),
GFP_KERNEL);
- if (!profile->freq_table) {
profile->max_state = 0;
- devfreq->max_state = count;
- devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
devfreq->max_state,
sizeof(*devfreq->freq_table),
GFP_KERNEL);
- if (!devfreq->freq_table) return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
- for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) {
devm_kfree(devfreq->dev.parent, profile->freq_table);
profile->max_state = 0;
devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp); } dev_pm_opp_put(opp);
profile->freq_table[i] = freq;
} return 0;devfreq->freq_table[i] = freq;
@@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[
(prev_lev * devfreq->profile->max_state) + lev]++;
}(prev_lev * devfreq->max_state) + lev]++; devfreq->stats.total_trans++;
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock);
- } else {
devfreq->freq_table = devfreq->profile->freq_table;
} devfreq->scaling_min_freq = find_available_min_freq(devfreq);devfreq->max_state = devfreq->profile->max_state;
@@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int),
devfreq->profile->max_state,
devfreq->profile->max_state),
devfreq->max_state,
if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock);devfreq->max_state), GFP_KERNEL);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
devfreq->profile->max_state,
if (!devfreq->stats.time_in_state) {devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL);
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
- for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
"%lu ", df->profile->freq_table[i]);
mutex_unlock(&df->lock); /* Truncate the trailing space */"%lu ", df->freq_table[i]);
@@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL;
- max_state = df->profile->max_state;
- max_state = df->max_state; if (max_state == 0) return sprintf(buf, "Not Supported.\n");
@@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu",
df->profile->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) {df->freq_table[i]);
if (df->profile->freq_table[i]
== df->previous_freq) {
if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*");
} else {
else len += sprintf(buf + len, " ");
}
len += sprintf(buf + len, "%10lu:",
df->profile->freq_table[i]);
len += sprintf(buf + len, "%10lu:", df->freq_table[i]); for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL;
- if (df->profile->max_state == 0)
- if (df->max_state == 0) return count; err = kstrtoint(buf, 10, &value);
@@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
- memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
df->profile->max_state,
df->profile->max_state));
df->max_state,
df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock);df->max_state));
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe1..ce24a262aa16 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
- for (i = 0; i < parent_devfreq->max_state; i++)
if (parent_devfreq->freq_table[i] == *freq) break;
- if (i == parent_devfreq->profile->max_state)
- if (i == parent_devfreq->max_state) return -EINVAL;
- if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
- if (i < devfreq->max_state) {
} else {child_freq = devfreq->freq_table[i];
count = devfreq->profile->max_state;
child_freq = devfreq->profile->freq_table[count - 1];
count = devfreq->max_state;
} out:child_freq = devfreq->freq_table[count - 1];
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats { * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. * @work: delayed work for load monitoring.
- @freq_table: current frequency table used by the devfreq driver.
- @max_state: count of entry present in the frequency table.
- @previous_freq: previously configured frequency value.
- @last_status: devfreq user device info, performance statistics
- @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work;
- unsigned long *freq_table;
- unsigned int max_state;
- unsigned long previous_freq; struct devfreq_dev_status last_status;
Best regards
Marek Szyprowski, PhD Samsung R&D Institute Poland
Best regards
linux-stable-mirror@lists.linaro.org