From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 048661a1f963e9517630f080687d48af79ed784c ]
Yanfei reported that setting HANDOFF should not depend on recomputing @first, only on @first state. Which would then give:
if (ww_ctx || !first) first = __mutex_waiter_is_first(lock, &waiter); if (first) __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
But because 'ww_ctx || !first' is basically 'always' and the test for first is relatively cheap, omit that first branch entirely.
Reported-by: Yanfei Xu yanfei.xu@windriver.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Waiman Long longman@redhat.com Reviewed-by: Yanfei Xu yanfei.xu@windriver.com Link: https://lore.kernel.org/r/20210630154114.896786297@infradead.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/locking/mutex.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d2df5e68b503..fb30e1436dfb 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -928,7 +928,6 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { struct mutex_waiter waiter; - bool first = false; struct ww_mutex *ww; int ret;
@@ -1007,6 +1006,8 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
set_current_state(state); for (;;) { + bool first; + /* * Once we hold wait_lock, we're serialized against * mutex_unlock() handing the lock off to us, do a trylock @@ -1035,15 +1036,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas spin_unlock(&lock->wait_lock); schedule_preempt_disabled();
- /* - * ww_mutex needs to always recheck its position since its waiter - * list is not FIFO ordered. - */ - if (ww_ctx || !first) { - first = __mutex_waiter_is_first(lock, &waiter); - if (first) - __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); - } + first = __mutex_waiter_is_first(lock, &waiter); + if (first) + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
set_current_state(state); /*
From: Jeongtae Park jeongtae.park@gmail.com
[ Upstream commit 1852f5ed358147095297a09cc3c6f160208a676d ]
This patch fixes the offset of register error log by using regmap_get_offset().
Signed-off-by: Jeongtae Park jeongtae.park@gmail.com Link: https://lore.kernel.org/r/20210701142630.44936-1-jeongtae.park@gmail.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index fe3e38dd5324..2fc826e97591 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1667,7 +1667,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, if (ret) { dev_err(map->dev, "Error in caching of register: %x ret: %d\n", - reg + i, ret); + reg + regmap_get_offset(map, i), ret); return ret; } }
From: Dmitry Osipenko digetx@gmail.com
[ Upstream commit e301df76472cc929fa62d923bc3892931f7ad71d ]
The TPS65910 regulator now gets a deferred probe until supply regulator is registered. Silence noisy error message about the deferred probe.
Reported-by: Matt Merhar mattmerhar@protonmail.com # Ouya T30 Tested-by: Matt Merhar mattmerhar@protonmail.com # Ouya T30 Signed-off-by: Dmitry Osipenko digetx@gmail.com Link: https://lore.kernel.org/r/20210705201211.16082-1-digetx@gmail.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/tps65910-regulator.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 1d5b0a1b86f7..06cbe60c990f 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -1211,12 +1211,10 @@ static int tps65910_probe(struct platform_device *pdev)
rdev = devm_regulator_register(&pdev->dev, &pmic->desc[i], &config); - if (IS_ERR(rdev)) { - dev_err(tps65910->dev, - "failed to register %s regulator\n", - pdev->name); - return PTR_ERR(rdev); - } + if (IS_ERR(rdev)) + return dev_err_probe(tps65910->dev, PTR_ERR(rdev), + "failed to register %s regulator\n", + pdev->name);
/* Save regulator for cleanup */ pmic->rdev[i] = rdev;
From: Sean Anderson sean.anderson@seco.com
[ Upstream commit df6313d707e575a679ada3313358289af24454c0 ]
After calling dma_map_single(), we must also call dma_mapping_error(). This fixes the following warning when compiling with CONFIG_DMA_API_DEBUG:
[ 311.241478] WARNING: CPU: 0 PID: 428 at kernel/dma/debug.c:1027 check_unmap+0x79c/0x96c [ 311.249547] DMA-API: mxs-dcp 2280000.crypto: device driver failed to check map error[device address=0x00000000860cb080] [size=32 bytes] [mapped as single]
Signed-off-by: Sean Anderson sean.anderson@seco.com Reviewed-by: Richard Weinberger richard@nod.at Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/mxs-dcp.c | 45 +++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d6a7784d2988..f397cc5bf102 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -170,15 +170,19 @@ static struct dcp *global_sdcp;
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) { + int dma_err; struct dcp *sdcp = global_sdcp; const int chan = actx->chan; uint32_t stat; unsigned long ret; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; - dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), DMA_TO_DEVICE);
+ dma_err = dma_mapping_error(sdcp->dev, desc_phys); + if (dma_err) + return dma_err; + reinit_completion(&sdcp->completion[chan]);
/* Clear status register. */ @@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, struct skcipher_request *req, int init) { + dma_addr_t key_phys, src_phys, dst_phys; struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req); int ret;
- dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, - 2 * AES_KEYSIZE_128, - DMA_TO_DEVICE); - dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, - DCP_BUF_SZ, DMA_TO_DEVICE); - dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, - DCP_BUF_SZ, DMA_FROM_DEVICE); + key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, + 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, key_phys); + if (ret) + return ret; + + src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, + DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, src_phys); + if (ret) + goto err_src; + + dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, + DCP_BUF_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, dst_phys); + if (ret) + goto err_dst;
if (actx->fill % AES_BLOCK_SIZE) { dev_err(sdcp->dev, "Invalid block size!\n"); @@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx);
aes_done_run: + dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); +err_dst: + dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); +err_src: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
return ret; } @@ -557,6 +574,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, buf_phys); + if (ret) + return ret; + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -589,6 +610,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) if (rctx->fini) { digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, digest_phys); + if (ret) + goto done_run; + desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; }
From: Quentin Perret qperret@google.com
[ Upstream commit f95091536f78971b269ec321b057b8d630b0ad8a ]
It is possible for sched_getattr() to incorrectly report the state of the reset_on_fork flag when called on a deadline task.
Indeed, if the flag was set on a deadline task using sched_setattr() with flags (SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_KEEP_PARAMS), then p->sched_reset_on_fork will be set, but __setscheduler() will bail out early, which means that the dl_se->flags will not get updated by __setscheduler_params()->__setparam_dl(). Consequently, if sched_getattr() is then called on the task, __getparam_dl() will override kattr.sched_flags with the now out-of-date copy in dl_se->flags and report the stale value to userspace.
To fix this, make sure to only copy the flags that are relevant to sched_deadline to and from the dl_se->flags field.
Signed-off-by: Quentin Perret qperret@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lore.kernel.org/r/20210727101103.2729607-2-qperret@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/deadline.c | 7 ++++--- kernel/sched/sched.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index aaacd6cfd42f..5cafc642e647 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2741,7 +2741,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_runtime = attr->sched_runtime; dl_se->dl_deadline = attr->sched_deadline; dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; - dl_se->flags = attr->sched_flags; + dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS; dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); } @@ -2754,7 +2754,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) attr->sched_runtime = dl_se->dl_runtime; attr->sched_deadline = dl_se->dl_deadline; attr->sched_period = dl_se->dl_period; - attr->sched_flags = dl_se->flags; + attr->sched_flags &= ~SCHED_DL_FLAGS; + attr->sched_flags |= dl_se->flags; }
/* @@ -2851,7 +2852,7 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) if (dl_se->dl_runtime != attr->sched_runtime || dl_se->dl_deadline != attr->sched_deadline || dl_se->dl_period != attr->sched_period || - dl_se->flags != attr->sched_flags) + dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS)) return true;
return false; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ddefb0419d7a..d53d19770866 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -227,6 +227,8 @@ static inline void update_avg(u64 *avg, u64 sample) */ #define SCHED_FLAG_SUGOV 0x10000000
+#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV) + static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) { #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit caa534c3ba40c6e8352b42cbbbca9ba481814ac8 ]
When fuel_gauge_reg_readb()/_writeb() fails, report which register we were trying to read / write when the error happened.
Also reword the message a bit: - Drop the axp288 prefix, dev_err() already prints this - Switch from telegram / abbreviated style to a normal sentence, aligning the message with those from fuel_gauge_read_*bit_word()
Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sebastian Reichel sebastian.reichel@collabora.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/power/supply/axp288_fuel_gauge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 2ba2d8d6b8e6..d1bcc52e67c3 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -147,7 +147,7 @@ static int fuel_gauge_reg_readb(struct axp288_fg_info *info, int reg) }
if (ret < 0) { - dev_err(&info->pdev->dev, "axp288 reg read err:%d\n", ret); + dev_err(&info->pdev->dev, "Error reading reg 0x%02x err: %d\n", reg, ret); return ret; }
@@ -161,7 +161,7 @@ static int fuel_gauge_reg_writeb(struct axp288_fg_info *info, int reg, u8 val) ret = regmap_write(info->regmap, reg, (unsigned int)val);
if (ret < 0) - dev_err(&info->pdev->dev, "axp288 reg write err:%d\n", ret); + dev_err(&info->pdev->dev, "Error writing reg 0x%02x err: %d\n", reg, ret);
return ret; }
From: Tony Lindgren tony@atomide.com
[ Upstream commit fe28140b3393b0ba1eb95cc109f974a7e58b26fd ]
We should not clear FLAGS_DMA_ACTIVE before omap_sham_update_dma_stop() is done calling dma_unmap_sg(). We already clear FLAGS_DMA_ACTIVE at the end of omap_sham_update_dma_stop().
The early clearing of FLAGS_DMA_ACTIVE is not causing issues as we do not need to defer anything based on FLAGS_DMA_ACTIVE currently. So this can be applied as clean-up.
Cc: Lokesh Vutla lokeshvutla@ti.com Cc: Tero Kristo kristo@kernel.org Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index dd53ad9987b0..a47ac60a4ee1 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1736,7 +1736,7 @@ static void omap_sham_done_task(unsigned long data) if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) goto finish; } else if (test_bit(FLAGS_DMA_READY, &dd->flags)) { - if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { + if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { omap_sham_update_dma_stop(dd); if (dd->err) { err = dd->err;
From: Dietmar Eggemann dietmar.eggemann@arm.com
[ Upstream commit b4da13aa28d4fd0071247b7b41c579ee8a86c81a ]
A missing clock update is causing the following warning:
rq->clock_update_flags < RQCF_ACT_SKIP WARNING: CPU: 112 PID: 2041 at kernel/sched/sched.h:1453 sub_running_bw.isra.0+0x190/0x1a0 ... CPU: 112 PID: 2041 Comm: sugov:112 Tainted: G W 5.14.0-rc1 #1 Hardware name: WIWYNN Mt.Jade Server System B81.030Z1.0007/Mt.Jade Motherboard, BIOS 1.6.20210526 (SCP: 1.06.20210526) 2021/05/26 ... Call trace: sub_running_bw.isra.0+0x190/0x1a0 migrate_task_rq_dl+0xf8/0x1e0 set_task_cpu+0xa8/0x1f0 try_to_wake_up+0x150/0x3d4 wake_up_q+0x64/0xc0 __up_write+0xd0/0x1c0 up_write+0x4c/0x2b0 cppc_set_perf+0x120/0x2d0 cppc_cpufreq_set_target+0xe0/0x1a4 [cppc_cpufreq] __cpufreq_driver_target+0x74/0x140 sugov_work+0x64/0x80 kthread_worker_fn+0xe0/0x230 kthread+0x138/0x140 ret_from_fork+0x10/0x18
The task causing this is the `cppc_fie` DL task introduced by commit 1eb5dde674f5 ("cpufreq: CPPC: Add support for frequency invariance").
With CONFIG_ACPI_CPPC_CPUFREQ_FIE=y and schedutil cpufreq governor on slow-switching system (like on this Ampere Altra WIWYNN Mt. Jade Arm Server):
DL task `curr=sugov:112` lets `p=cppc_fie` migrate and since the latter is in `non_contending` state, migrate_task_rq_dl() calls
sub_running_bw()->__sub_running_bw()->cpufreq_update_util()-> rq_clock()->assert_clock_updated()
on p.
Fix this by updating the clock for a non_contending task in migrate_task_rq_dl() before calling sub_running_bw().
Reported-by: Bruno Goncalves bgoncalv@redhat.com Signed-off-by: Dietmar Eggemann dietmar.eggemann@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Daniel Bristot de Oliveira bristot@kernel.org Acked-by: Juri Lelli juri.lelli@redhat.com Link: https://lore.kernel.org/r/20210804135925.3734605-1-dietmar.eggemann@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5cafc642e647..e94314633b39 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1733,6 +1733,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused */ raw_spin_rq_lock(rq); if (p->dl.dl_non_contending) { + update_rq_clock(rq); sub_running_bw(&p->dl, &rq->dl); p->dl.dl_non_contending = 0; /*
From: Sergey Senozhatsky senozhatsky@chromium.org
[ Upstream commit ccfc9dd6914feaa9a81f10f9cce56eb0f7712264 ]
The soft watchdog timer function checks if a virtual machine was suspended and hence what looks like a lockup in fact is a false positive.
This is what kvm_check_and_clear_guest_paused() does: it tests guest PVCLOCK_GUEST_STOPPED (which is set by the host) and if it's set then we need to touch all watchdogs and bail out.
Watchdog timer function runs from IRQ, so PVCLOCK_GUEST_STOPPED check works fine.
There is, however, one more watchdog that runs from IRQ, so watchdog timer fn races with it, and that watchdog is not aware of PVCLOCK_GUEST_STOPPED - RCU stall detector.
apic_timer_interrupt() smp_apic_timer_interrupt() hrtimer_interrupt() __hrtimer_run_queues() tick_sched_timer() tick_sched_handle() update_process_times() rcu_sched_clock_irq()
This triggers RCU stalls on our devices during VM resume.
If tick_sched_handle()->rcu_sched_clock_irq() runs on a VCPU before watchdog_timer_fn()->kvm_check_and_clear_guest_paused() then there is nothing on this VCPU that touches watchdogs and RCU reads stale gp stall timestamp and new jiffies value, which makes it think that RCU has stalled.
Make RCU stall watchdog aware of PVCLOCK_GUEST_STOPPED and don't report RCU stalls when we resume the VM.
Signed-off-by: Sergey Senozhatsky senozhatsky@chromium.org Signed-off-by: Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/rcu/tree_stall.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6c76988cc019..3d11155e0033 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,8 @@ * Author: Paul E. McKenney paulmck@linux.ibm.com */
+#include <linux/kvm_para.h> + ////////////////////////////////////////////////////////////////////////////// // // Controlling CPU stall warnings, including delay calculation. @@ -696,6 +698,14 @@ static void check_cpu_stall(struct rcu_data *rdp) (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+ /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) @@ -705,6 +715,14 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+ /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
From: Smita Koralahalli Smita.KoralahalliChannabasappa@amd.com
[ Upstream commit 767f4b620edadac579c9b8b6660761d4285fa6f9 ]
Hypervisors likely do not expose the SMCA feature to the guest and loading this module leads to false warnings. This module should not be loaded in guests to begin with, but people tend to do so, especially when testing kernels in VMs. And then they complain about those false warnings.
Do the practical thing and do not load this module when running as a guest to avoid all that complaining.
[ bp: Rewrite commit message. ]
Suggested-by: Borislav Petkov bp@suse.de Signed-off-by: Smita Koralahalli Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Yazen Ghannam yazen.ghannam@amd.com Tested-by: Kim Phillips kim.phillips@amd.com Link: https://lkml.kernel.org/r/20210628172740.245689-1-Smita.KoralahalliChannabas... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/edac/mce_amd.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 27d56920b469..67dbf4c31271 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1246,6 +1246,9 @@ static int __init mce_amd_init(void) c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + if (boot_cpu_has(X86_FEATURE_SMCA)) { xec_mask = 0x3f; goto out;
From: Frederic Weisbecker frederic@kernel.org
[ Upstream commit 406dd42bd1ba0c01babf9cde169bb319e52f6147 ]
When an itimer deactivates a previously armed expiration, it simply doesn't do anything. As a result the process wide cputime counter keeps running and the tick dependency stays set until it reaches the old ghost expiration value.
This can be reproduced with the following snippet:
void trigger_process_counter(void) { struct itimerval n = {};
n.it_value.tv_sec = 100; setitimer(ITIMER_VIRTUAL, &n, NULL); n.it_value.tv_sec = 0; setitimer(ITIMER_VIRTUAL, &n, NULL); }
Fix this with resetting the relevant base expiration. This is similar to disarming a timer.
Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Thomas Gleixner tglx@linutronix.de Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lore.kernel.org/r/20210726125513.271824-4-frederic@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/time/posix-cpu-timers.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 517be7fd175e..a002685f688d 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1346,8 +1346,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, } }
- if (!*newval) - return; *newval += now; }
From: Thomas Gleixner tglx@linutronix.de
[ Upstream commit 627ef5ae2df8eeccb20d5af0e4cfa4df9e61ed28 ]
If __hrtimer_start_range_ns() is invoked with an already armed hrtimer then the timer has to be canceled first and then added back. If the timer is the first expiring timer then on removal the clockevent device is reprogrammed to the next expiring timer to avoid that the pending expiry fires needlessly.
If the new expiry time ends up to be the first expiry again then the clock event device has to reprogrammed again.
Avoid this by checking whether the timer is the first to expire and in that case, keep the timer on the current CPU and delay the reprogramming up to the point where the timer has been enqueued again.
Reported-by: Lorenzo Colitti lorenzo@google.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/20210713135157.873137732@linutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/time/hrtimer.c | 60 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 7 deletions(-)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4a66725b1d4a..ba2e0d0a0e5a 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1030,12 +1030,13 @@ static void __remove_hrtimer(struct hrtimer *timer, * remove hrtimer, called with base lock held */ static inline int -remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) +remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, + bool restart, bool keep_local) { u8 state = timer->state;
if (state & HRTIMER_STATE_ENQUEUED) { - int reprogram; + bool reprogram;
/* * Remove the timer and force reprogramming when high @@ -1048,8 +1049,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest debug_deactivate(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ /* + * If the timer is not restarted then reprogramming is + * required if the timer is local. If it is local and about + * to be restarted, avoid programming it twice (on removal + * and a moment later when it's requeued). + */ if (!restart) state = HRTIMER_STATE_INACTIVE; + else + reprogram &= !keep_local;
__remove_hrtimer(timer, base, state, reprogram); return 1; @@ -1103,9 +1112,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base) { struct hrtimer_clock_base *new_base; + bool force_local, first;
- /* Remove an active timer from the queue: */ - remove_hrtimer(timer, base, true); + /* + * If the timer is on the local cpu base and is the first expiring + * timer then this might end up reprogramming the hardware twice + * (on removal and on enqueue). To avoid that by prevent the + * reprogram on removal, keep the timer local to the current CPU + * and enforce reprogramming after it is queued no matter whether + * it is the new first expiring timer again or not. + */ + force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local &= base->cpu_base->next_timer == timer; + + /* + * Remove an active timer from the queue. In case it is not queued + * on the current CPU, make sure that remove_hrtimer() updates the + * remote data correctly. + * + * If it's on the current CPU and the first expiring timer, then + * skip reprogramming, keep the timer local and enforce + * reprogramming later if it was the first expiring timer. This + * avoids programming the underlying clock event twice (once at + * removal and once after enqueue). + */ + remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL) tim = ktime_add_safe(tim, base->get_time()); @@ -1115,9 +1146,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, hrtimer_set_expires_range_ns(timer, tim, delta_ns);
/* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + if (!force_local) { + new_base = switch_hrtimer_base(timer, base, + mode & HRTIMER_MODE_PINNED); + } else { + new_base = base; + } + + first = enqueue_hrtimer(timer, new_base, mode); + if (!force_local) + return first;
- return enqueue_hrtimer(timer, new_base, mode); + /* + * Timer was forced to stay on the current CPU to avoid + * reprogramming on removal and enqueue. Force reprogram the + * hardware by evaluating the new first expiring timer. + */ + hrtimer_force_reprogram(new_base->cpu_base, 1); + return 0; }
/** @@ -1183,7 +1229,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer)) - ret = remove_hrtimer(timer, base, false); + ret = remove_hrtimer(timer, base, false, false);
unlock_hrtimer_base(timer, &flags);
From: Thomas Gleixner tglx@linutronix.de
[ Upstream commit 8c3b5e6ec0fee18bc2ce38d1dfe913413205f908 ]
If high resolution timers are disabled the timerfd notification about a clock was set event is not happening for all cases which use clock_was_set_delayed() because that's a NOP for HIGHRES=n, which is wrong.
Make clock_was_set_delayed() unconditially available to fix that.
Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/20210713135158.196661266@linutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/hrtimer.h | 5 ----- kernel/time/hrtimer.c | 32 ++++++++++++++++---------------- kernel/time/tick-internal.h | 3 +++ 3 files changed, 19 insertions(+), 21 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bb5e7b0a4274..77295af72426 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -318,16 +318,12 @@ struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
-extern void clock_was_set_delayed(void); - extern unsigned int hrtimer_resolution;
#else
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
-static inline void clock_was_set_delayed(void) { } - #endif
static inline ktime_t @@ -351,7 +347,6 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) timer->base->get_time()); }
-extern void clock_was_set(void); #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); #else diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ba2e0d0a0e5a..5af758473488 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -758,22 +758,6 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); }
-static void clock_was_set_work(struct work_struct *work) -{ - clock_was_set(); -} - -static DECLARE_WORK(hrtimer_work, clock_was_set_work); - -/* - * Called from timekeeping and resume code to reprogram the hrtimer - * interrupt device on all cpus. - */ -void clock_was_set_delayed(void) -{ - schedule_work(&hrtimer_work); -} - #else
static inline int hrtimer_is_hres_enabled(void) { return 0; } @@ -891,6 +875,22 @@ void clock_was_set(void) timerfd_clock_was_set(); }
+static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + +/* + * Called from timekeeping and resume code to reprogram the hrtimer + * interrupt device on all cpus and to notify timerfd. + */ +void clock_was_set_delayed(void) +{ + schedule_work(&hrtimer_work); +} + /* * During resume we might have to reprogram the high resolution timer * interrupt on all online CPUs. However, all other CPUs will be diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 6a742a29e545..cd610faa2523 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -165,3 +165,6 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); void timer_clear_idle(void); + +void clock_was_set(void); +void clock_was_set_delayed(void);
From: Jan Kara jack@suse.cz
[ Upstream commit 781d2a9a2fc7d0be53a072794dc03ef6de770f3d ]
We were checking validity of LVID entries only when getting implementation use information from LVID in udf_sb_lvidiu(). However if the LVID is suitably corrupted, it can cause problems also to code such as udf_count_free() which doesn't use udf_sb_lvidiu(). So check validity of LVID already when loading it from the disk and just disable LVID altogether when it is not valid.
Reported-by: syzbot+7fbfe5fed73ebb675748@syzkaller.appspotmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/udf/super.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/fs/udf/super.c b/fs/udf/super.c index 2f83c1204e20..1eeb75a1efd2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -108,16 +108,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) return NULL; lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; partnum = le32_to_cpu(lvid->numOfPartitions); - if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) - - offsetof(struct logicalVolIntegrityDesc, impUse)) / - (2 * sizeof(uint32_t)) < partnum) { - udf_err(sb, "Logical volume integrity descriptor corrupted " - "(numOfPartitions = %u)!\n", partnum); - return NULL; - } /* The offset is to skip freeSpaceTable and sizeTable arrays */ offset = partnum * 2 * sizeof(uint32_t); - return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); + return (struct logicalVolIntegrityDescImpUse *) + (((uint8_t *)(lvid + 1)) + offset); }
/* UDF filesystem type */ @@ -1542,6 +1536,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; int indirections = 0; + u32 parts, impuselen;
while (++indirections <= UDF_MAX_LVID_NESTING) { final_bh = NULL; @@ -1568,15 +1563,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data; if (lvid->nextIntegrityExt.extLength == 0) - return; + goto check;
loc = leea_to_cpu(lvid->nextIntegrityExt); }
udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n", UDF_MAX_LVID_NESTING); +out_err: brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = NULL; + return; +check: + parts = le32_to_cpu(lvid->numOfPartitions); + impuselen = le32_to_cpu(lvid->lengthOfImpUse); + if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize || + sizeof(struct logicalVolIntegrityDesc) + impuselen + + 2 * parts * sizeof(u32) > sb->s_blocksize) { + udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), " + "ignoring.\n", parts, impuselen); + goto out_err; + } }
/*
From: Pali Rohár pali@kernel.org
[ Upstream commit b645333443712d2613e4e863f81090d5dc509657 ]
Currently iocharset=utf8 mount option is broken. To use UTF-8 as iocharset, it is required to use utf8 mount option.
Fix iocharset=utf8 mount option to use be equivalent to the utf8 mount option.
If UTF-8 as iocharset is used then s_nls_map is set to NULL. So simplify code around, remove UDF_FLAG_NLS_MAP and UDF_FLAG_UTF8 flags as to distinguish between UTF-8 and non-UTF-8 it is needed just to check if s_nls_map set to NULL or not.
Link: https://lore.kernel.org/r/20210808162453.1653-4-pali@kernel.org Signed-off-by: Pali Rohár pali@kernel.org Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/udf/super.c | 50 ++++++++++++++++++------------------------------ fs/udf/udf_sb.h | 2 -- fs/udf/unicode.c | 4 ++-- 3 files changed, 21 insertions(+), 35 deletions(-)
diff --git a/fs/udf/super.c b/fs/udf/super.c index 1eeb75a1efd2..b2d7c57d0688 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -343,10 +343,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) - seq_puts(seq, ",utf8"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) + if (sbi->s_nls_map) seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset); + else + seq_puts(seq, ",iocharset=utf8");
return 0; } @@ -552,19 +552,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt, /* Ignored (never implemented properly) */ break; case Opt_utf8: - uopt->flags |= (1 << UDF_FLAG_UTF8); + if (!remount) { + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } break; case Opt_iocharset: if (!remount) { - if (uopt->nls_map) - unload_nls(uopt->nls_map); - /* - * load_nls() failure is handled later in - * udf_fill_super() after all options are - * parsed. - */ + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } + /* When nls_map is not loaded then UTF-8 is used */ + if (!remount && strcmp(args[0].from, "utf8") != 0) { uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + if (!uopt->nls_map) { + pr_err("iocharset %s not found\n", + args[0].from); + return 0; + } } break; case Opt_uforget: @@ -2146,21 +2151,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!udf_parse_options((char *)options, &uopt, false)) goto parse_options_failure;
- if (uopt.flags & (1 << UDF_FLAG_UTF8) && - uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { - udf_err(sb, "utf8 cannot be combined with iocharset\n"); - goto parse_options_failure; - } - if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { - uopt.nls_map = load_nls_default(); - if (!uopt.nls_map) - uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP); - else - udf_debug("Using default NLS map\n"); - } - if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP))) - uopt.flags |= (1 << UDF_FLAG_UTF8); - fileset.logicalBlockNum = 0xFFFFFFFF; fileset.partitionReferenceNum = 0xFFFF;
@@ -2315,8 +2305,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) error_out: iput(sbi->s_vat_inode); parse_options_failure: - if (uopt.nls_map) - unload_nls(uopt.nls_map); + unload_nls(uopt.nls_map); if (lvid_open) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); @@ -2366,8 +2355,7 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb);
iput(sbi->s_vat_inode); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(sbi->s_nls_map); + unload_nls(sbi->s_nls_map); if (!sb_rdonly(sb)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 758efe557a19..4fa620543d30 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -20,8 +20,6 @@ #define UDF_FLAG_UNDELETE 6 #define UDF_FLAG_UNHIDE 7 #define UDF_FLAG_VARCONV 8 -#define UDF_FLAG_NLS_MAP 9 -#define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ #define UDF_FLAG_GID_FORGET 12 #define UDF_FLAG_UID_SET 13 diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 5fcfa96463eb..622569007b53 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb, return 0; }
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->uni2char; else conv_f = NULL; @@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb, if (ocu_max_len <= 0) return 0;
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->char2uni; else conv_f = NULL;
From: Pali Rohár pali@kernel.org
[ Upstream commit 28ce50f8d96ec9035f60c9348294ea26b94db944 ]
Currently iocharset=utf8 mount option is broken. To use UTF-8 as iocharset, it is required to use utf8 mount option.
Fix iocharset=utf8 mount option to use be equivalent to the utf8 mount option.
If UTF-8 as iocharset is used then s_nls_iocharset is set to NULL. So simplify code around, remove s_utf8 field as to distinguish between UTF-8 and non-UTF-8 it is needed just to check if s_nls_iocharset is set to NULL or not.
Link: https://lore.kernel.org/r/20210808162453.1653-5-pali@kernel.org Signed-off-by: Pali Rohár pali@kernel.org Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/isofs/inode.c | 27 +++++++++++++-------------- fs/isofs/isofs.h | 1 - fs/isofs/joliet.c | 4 +--- 3 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 21edc423b79f..678e2c51b855 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -155,7 +155,6 @@ struct iso9660_options{ unsigned int overriderockperm:1; unsigned int uid_set:1; unsigned int gid_set:1; - unsigned int utf8:1; unsigned char map; unsigned char check; unsigned int blocksize; @@ -356,7 +355,6 @@ static int parse_options(char *options, struct iso9660_options *popt) popt->gid = GLOBAL_ROOT_GID; popt->uid = GLOBAL_ROOT_UID; popt->iocharset = NULL; - popt->utf8 = 0; popt->overriderockperm = 0; popt->session=-1; popt->sbsector=-1; @@ -389,10 +387,13 @@ static int parse_options(char *options, struct iso9660_options *popt) case Opt_cruft: popt->cruft = 1; break; +#ifdef CONFIG_JOLIET case Opt_utf8: - popt->utf8 = 1; + kfree(popt->iocharset); + popt->iocharset = kstrdup("utf8", GFP_KERNEL); + if (!popt->iocharset) + return 0; break; -#ifdef CONFIG_JOLIET case Opt_iocharset: kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); @@ -495,7 +496,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) if (sbi->s_nocompress) seq_puts(m, ",nocompress"); if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm"); if (sbi->s_showassoc) seq_puts(m, ",showassoc"); - if (sbi->s_utf8) seq_puts(m, ",utf8");
if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check); if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping); @@ -518,9 +518,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",fmode=%o", sbi->s_fmode);
#ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset && - strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) + if (sbi->s_nls_iocharset) seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); + else + seq_puts(m, ",iocharset=utf8"); #endif return 0; } @@ -863,14 +864,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) sbi->s_nls_iocharset = NULL;
#ifdef CONFIG_JOLIET - if (joliet_level && opt.utf8 == 0) { + if (joliet_level) { char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; - sbi->s_nls_iocharset = load_nls(p); - if (! sbi->s_nls_iocharset) { - /* Fail only if explicit charset specified */ - if (opt.iocharset) + if (strcmp(p, "utf8") != 0) { + sbi->s_nls_iocharset = opt.iocharset ? + load_nls(opt.iocharset) : load_nls_default(); + if (!sbi->s_nls_iocharset) goto out_freesbi; - sbi->s_nls_iocharset = load_nls_default(); } } #endif @@ -886,7 +886,6 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) sbi->s_gid = opt.gid; sbi->s_uid_set = opt.uid_set; sbi->s_gid_set = opt.gid_set; - sbi->s_utf8 = opt.utf8; sbi->s_nocompress = opt.nocompress; sbi->s_overriderockperm = opt.overriderockperm; /* diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 055ec6c586f7..dcdc191ed183 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -44,7 +44,6 @@ struct isofs_sb_info { unsigned char s_session; unsigned int s_high_sierra:1; unsigned int s_rock:2; - unsigned int s_utf8:1; unsigned int s_cruft:1; /* Broken disks with high byte of length * containing junk */ unsigned int s_nocompress:1; diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index be8b6a9d0b92..c0f04a1e7f69 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { - unsigned char utf8; struct nls_table *nls; unsigned char len = 0;
- utf8 = ISOFS_SB(inode->i_sb)->s_utf8; nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
- if (utf8) { + if (!nls) { len = utf16s_to_utf8s((const wchar_t *) de->name, de->name_len[0] >> 1, UTF16_BIG_ENDIAN, outname, PAGE_SIZE);
From: Christoph Hellwig hch@lst.de
[ Upstream commit 224b0683228c5f332f9cee615d85e75e9a347170 ]
Except for the IDA none of the allocations in bcache_device_init is unwound on error, fix that.
Signed-off-by: Christoph Hellwig hch@lst.de Acked-by: Coly Li colyli@suse.de Link: https://lore.kernel.org/r/20210809064028.1198327-7-hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/bcache/super.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 185246a0d855..d0f08e946453 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -931,20 +931,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); if (!d->full_dirty_stripes) - return -ENOMEM; + goto out_free_stripe_sectors_dirty;
idx = ida_simple_get(&bcache_device_idx, 0, BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); if (idx < 0) - return idx; + goto out_free_full_dirty_stripes;
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) - goto err; + goto out_ida_remove;
d->disk = blk_alloc_disk(NUMA_NO_NODE); if (!d->disk) - goto err; + goto out_bioset_exit;
set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); @@ -987,8 +987,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
return 0;
-err: +out_bioset_exit: + bioset_exit(&d->bio_split); +out_ida_remove: ida_simple_remove(&bcache_device_idx, idx); +out_free_full_dirty_stripes: + kvfree(d->full_dirty_stripes); +out_free_stripe_sectors_dirty: + kvfree(d->stripe_sectors_dirty); return -ENOMEM;
}
From: Baokun Li libaokun1@huawei.com
[ Upstream commit fad7cd3310db3099f95dd34312c77740fbc455e5 ]
If user specify a large enough value of NBD blocks option, it may trigger signed integer overflow which may lead to nbd->config->bytesize becomes a large or small value, zero in particular.
UBSAN: Undefined behaviour in drivers/block/nbd.c:325:31 signed integer overflow: 1024 * 4611686155866341414 cannot be represented in type 'long long int' [...] Call trace: [...] handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_mul_overflow+0x34/0x44 lib/ubsan.c:213 nbd_size_set drivers/block/nbd.c:325 [inline] __nbd_ioctl drivers/block/nbd.c:1342 [inline] nbd_ioctl+0x998/0xa10 drivers/block/nbd.c:1395 __blkdev_driver_ioctl block/ioctl.c:311 [inline] [...]
Although it is not a big deal, still silence the UBSAN by limit the input value.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Josef Bacik josef@toxicpanda.com Link: https://lore.kernel.org/r/20210804021212.990223-1-libaokun1@huawei.com [axboe: dropped unlikely()] Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 19f5d5a8b16a..acf3f85bf3c7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1388,6 +1388,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { struct nbd_config *config = nbd->config; + loff_t bytesize;
switch (cmd) { case NBD_DISCONNECT: @@ -1402,8 +1403,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - return nbd_set_size(nbd, arg * config->blksize, - config->blksize); + if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize)) + return -EINVAL; + return nbd_set_size(nbd, bytesize, config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0;
From: Chunguang Xu brookxu@tencent.com
[ Upstream commit 4f1e9630afe6332de7286820fedd019f19eac057 ]
After patch 54efd50 (block: make generic_make_request handle arbitrarily sized bios), the IO through io-throttle may be larger, and these IOs may be further split into more small IOs. However, IOPS throttle does not seem to be aware of this change, which makes the calculation of IOPS of large IOs incomplete, resulting in disk-side IOPS that does not meet expectations. Maybe we should fix this problem.
We can reproduce it by set max_sectors_kb of disk to 128, set blkio.write_iops_throttle to 100, run a dd instance inside blkio and use iostat to watch IOPS:
dd if=/dev/zero of=/dev/sdb bs=1M count=1000 oflag=direct
As a result, without this change the average IOPS is 1995, with this change the IOPS is 98.
Signed-off-by: Chunguang Xu brookxu@tencent.com Acked-by: Tejun Heo tj@kernel.org Link: https://lore.kernel.org/r/65869aaad05475797d63b4c3fed4f529febe3c26.162787601... Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/blk-merge.c | 2 ++ block/blk-throttle.c | 32 ++++++++++++++++++++++++++++++++ block/blk.h | 2 ++ 3 files changed, 36 insertions(+)
diff --git a/block/blk-merge.c b/block/blk-merge.c index a11b3b53717e..22eeaad190d7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -348,6 +348,8 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; + + blk_throtl_charge_bio_split(*bio); } }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b1b22d863bdf..55c49015e533 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -178,6 +178,9 @@ struct throtl_grp { unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ unsigned long bio_cnt_reset_time;
+ atomic_t io_split_cnt[2]; + atomic_t last_io_split_cnt[2]; + struct blkg_rwstat stat_bytes; struct blkg_rwstat stat_ios; }; @@ -777,6 +780,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg, tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0;
+ atomic_set(&tg->io_split_cnt[rw], 0); + /* * Previous slice has expired. We must have trimmed it after last * bio dispatch. That means since start of last slice, we never used @@ -799,6 +804,9 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw) tg->io_disp[rw] = 0; tg->slice_start[rw] = jiffies; tg->slice_end[rw] = jiffies + tg->td->throtl_slice; + + atomic_set(&tg->io_split_cnt[rw], 0); + throtl_log(&tg->service_queue, "[%c] new slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], @@ -1031,6 +1039,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, jiffies + tg->td->throtl_slice); }
+ if (iops_limit != UINT_MAX) + tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0); + if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) && tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) { if (wait) @@ -2052,12 +2063,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg) }
if (tg->iops[READ][LIMIT_LOW]) { + tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0); iops = tg->last_io_disp[READ] * HZ / elapsed_time; if (iops >= tg->iops[READ][LIMIT_LOW]) tg->last_low_overflow_time[READ] = now; }
if (tg->iops[WRITE][LIMIT_LOW]) { + tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0); iops = tg->last_io_disp[WRITE] * HZ / elapsed_time; if (iops >= tg->iops[WRITE][LIMIT_LOW]) tg->last_low_overflow_time[WRITE] = now; @@ -2176,6 +2189,25 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif
+void blk_throtl_charge_bio_split(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct throtl_grp *parent = blkg_to_tg(blkg); + struct throtl_service_queue *parent_sq; + bool rw = bio_data_dir(bio); + + do { + if (!parent->has_rules[rw]) + break; + + atomic_inc(&parent->io_split_cnt[rw]); + atomic_inc(&parent->last_io_split_cnt[rw]); + + parent_sq = parent->service_queue.parent_sq; + parent = sq_to_tg(parent_sq); + } while (parent); +} + bool blk_throtl_bio(struct bio *bio) { struct request_queue *q = bio->bi_bdev->bd_disk->queue; diff --git a/block/blk.h b/block/blk.h index cb01429c162c..f10cc9b2c27f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -289,11 +289,13 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern void blk_throtl_register_queue(struct request_queue *q); +extern void blk_throtl_charge_bio_split(struct bio *bio); bool blk_throtl_bio(struct bio *bio); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } static inline void blk_throtl_register_queue(struct request_queue *q) { } +static inline void blk_throtl_charge_bio_split(struct bio *bio) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } #endif /* CONFIG_BLK_DEV_THROTTLING */ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
From: Ruozhu Li liruozhu@huawei.com
[ Upstream commit 664227fde63844d69e9ec9e90a8a7801e6ff072d ]
We update ctrl->queue_count and schedule another reconnect when io queue count is zero.But we will never try to create any io queue in next reco- nnection, because ctrl->queue_count already set to zero.We will end up having an admin-only session in Live state, which is exactly what we try to avoid in the original patch. Update ctrl->queue_count after queue_count zero checking to fix it.
Signed-off-by: Ruozhu Li liruozhu@huawei.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8cb15ee5b249..18bd68b82d78 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1769,13 +1769,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) if (ret) return ret;
- ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->device, "unable to set any I/O queues\n"); return -ENOMEM; }
+ ctrl->queue_count = nr_io_queues + 1; dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues);
From: Ruozhu Li liruozhu@huawei.com
[ Upstream commit 85032874f80ba17bf187de1d14d9603bf3f582b8 ]
We update ctrl->queue_count and schedule another reconnect when io queue count is zero.But we will never try to create any io queue in next reco- nnection, because ctrl->queue_count already set to zero.We will end up having an admin-only session in Live state, which is exactly what we try to avoid in the original patch. Update ctrl->queue_count after queue_count zero checking to fix it.
Signed-off-by: Ruozhu Li liruozhu@huawei.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7f6b3a991501..3bd9cbc80246 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret;
- ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->ctrl.device, "unable to set any I/O queues\n"); return -ENOMEM; }
+ ctrl->ctrl.queue_count = nr_io_queues + 1; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
From: Amit Engel amit.engel@dell.com
[ Upstream commit e804d5abe2d74cfe23f5f83be580d1cdc9307111 ]
According to the NVMe specification, the response dword 0 value of the Connect command is based on status code: return cntlid for successful compeltion return IPO and IATTR for connect invalid parameters. Fix a missing error information for a zero sized queue, and return the cntlid also for I/O queue Connect commands.
Signed-off-by: Amit Engel amit.engel@dell.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/target/fabrics-cmd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 7d0f3523fdab..8ef564c3b32c 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -120,6 +120,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto err; } @@ -260,11 +261,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) }
status = nvmet_install_queue(ctrl, req); - if (status) { - /* pass back cntlid that had the issue of installing queue */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); + if (status) goto out_ctrl_put; - } + + /* pass back cntlid for successful completion */ + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
From: Dmitry Osipenko digetx@gmail.com
[ Upstream commit efe2175478d5237949e33c84d9a722fc084b218c ]
Pin control needs to be activated by setting the enable bit, otherwise hardware rejects all pin changes. Previously this stayed unnoticed on Nexus 7 because pin control was enabled by default after rebooting from downstream kernel, which uses driver that enables the bit and charger registers are non-volatile until power supply (battery) is disconnected. Configure the pin control enable bit. This fixes the potentially never-enabled charging on devices that use pin control.
Signed-off-by: Dmitry Osipenko digetx@gmail.com Signed-off-by: Sebastian Reichel sebastian.reichel@collabora.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/power/supply/smb347-charger.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c index df240420f2de..9d8c2fadd4d0 100644 --- a/drivers/power/supply/smb347-charger.c +++ b/drivers/power/supply/smb347-charger.c @@ -55,6 +55,7 @@ #define CFG_PIN_EN_CTRL_ACTIVE_LOW 0x60 #define CFG_PIN_EN_APSD_IRQ BIT(1) #define CFG_PIN_EN_CHARGER_ERROR BIT(2) +#define CFG_PIN_EN_CTRL BIT(4) #define CFG_THERM 0x07 #define CFG_THERM_SOFT_HOT_COMPENSATION_MASK 0x03 #define CFG_THERM_SOFT_HOT_COMPENSATION_SHIFT 0 @@ -724,6 +725,15 @@ static int smb347_hw_init(struct smb347_charger *smb) if (ret < 0) goto fail;
+ /* Activate pin control, making it writable. */ + switch (smb->enable_control) { + case SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW: + case SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH: + ret = regmap_set_bits(smb->regmap, CFG_PIN, CFG_PIN_EN_CTRL); + if (ret < 0) + goto fail; + } + /* * Make the charging functionality controllable by a write to the * command register unless pin control is specified in the platform
From: Sebastian Krzyszkowiak sebastian.krzyszkowiak@puri.sm
[ Upstream commit ed0d0a0506025f06061325cedae1bbebd081620a ]
Signed-off-by: Sebastian Krzyszkowiak sebastian.krzyszkowiak@puri.sm Signed-off-by: Sebastian Reichel sebastian.reichel@collabora.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/power/supply/max17042_battery.c | 2 +- include/linux/power/max17042_battery.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index ce2041b30a06..215e77d3b6d9 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -748,7 +748,7 @@ static inline void max17042_override_por_values(struct max17042_chip *chip) struct max17042_config_data *config = chip->pdata->config_data;
max17042_override_por(map, MAX17042_TGAIN, config->tgain); - max17042_override_por(map, MAx17042_TOFF, config->toff); + max17042_override_por(map, MAX17042_TOFF, config->toff); max17042_override_por(map, MAX17042_CGAIN, config->cgain); max17042_override_por(map, MAX17042_COFF, config->coff);
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index d55c746ac56e..e00ad1cfb1f1 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -69,7 +69,7 @@ enum max17042_register { MAX17042_RelaxCFG = 0x2A, MAX17042_MiscCFG = 0x2B, MAX17042_TGAIN = 0x2C, - MAx17042_TOFF = 0x2D, + MAX17042_TOFF = 0x2D, MAX17042_CGAIN = 0x2E, MAX17042_COFF = 0x2F,
From: Vineeth Vijayan vneethv@linux.ibm.com
[ Upstream commit d3683c055212bf910d4e318f7944910ce10dbee6 ]
Introduce dev_busid, which exports the device-id associated with the io-subchannel (and message-subchannel). The dev_busid indicates that of the device which may be physically installed on the corrosponding subchannel. The dev_busid value "none" indicates that the subchannel is not valid, there is no I/O device currently associated with the subchannel.
The dev_busid information would be helpful to write device-specific udev-rules associated with the subchannel. The dev_busid interface would be available even when the sch is not bound to any driver or if there is no operational device connected on it. Hence this attribute can be used to write udev-rules which are specific to the device associated with the subchannel.
Signed-off-by: Vineeth Vijayan vneethv@linux.ibm.com Reviewed-by: Peter Oberparleiter oberpar@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/cio/css.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index a974943c27da..9fcdb8d81eee 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -430,9 +430,26 @@ static ssize_t pimpampom_show(struct device *dev, } static DEVICE_ATTR_RO(pimpampom);
+static ssize_t dev_busid_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct pmcw *pmcw = &sch->schib.pmcw; + + if ((pmcw->st == SUBCHANNEL_TYPE_IO || + pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv) + return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid, + pmcw->dev); + else + return sysfs_emit(buf, "none\n"); +} +static DEVICE_ATTR_RO(dev_busid); + static struct attribute *io_subchannel_type_attrs[] = { &dev_attr_chpids.attr, &dev_attr_pimpampom.attr, + &dev_attr_dev_busid.attr, NULL, }; ATTRIBUTE_GROUPS(io_subchannel_type);
From: Harald Freudenberger freude@linux.ibm.com
[ Upstream commit 8617bb74006252cb2286008afe7d6575a6425857 ]
Tests showed a mismatch between what the CCA tool reports about the APKA master key state and what's displayed by the zcrypt dd in sysfs. After some investigation, we found out that the documentation which was the source for the zcrypt dd implementation lacks the listing of 3 fields. So this patch now moves the evaluation of the APKA master key state to the correct offset.
Signed-off-by: Harald Freudenberger freude@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/crypto/zcrypt_ccamisc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index bc34bedf9db8..6a3c2b460965 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1724,10 +1724,10 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci) rlen = vlen = PAGE_SIZE/2; rc = cca_query_crypto_facility(cardnr, domain, "STATICSB", rarray, &rlen, varray, &vlen); - if (rc == 0 && rlen >= 10*8 && vlen >= 240) { - ci->new_apka_mk_state = (char) rarray[7*8]; - ci->cur_apka_mk_state = (char) rarray[8*8]; - ci->old_apka_mk_state = (char) rarray[9*8]; + if (rc == 0 && rlen >= 13*8 && vlen >= 240) { + ci->new_apka_mk_state = (char) rarray[10*8]; + ci->cur_apka_mk_state = (char) rarray[11*8]; + ci->old_apka_mk_state = (char) rarray[12*8]; if (ci->old_apka_mk_state == '2') memcpy(&ci->old_apka_mkvp, varray + 208, 8); if (ci->cur_apka_mk_state == '2')
From: Damien Le Moal damien.lemoal@wdc.com
[ Upstream commit 355a8031dc174450ccad2a61c513ad7222d87a97 ]
The loop on entry of ata_host_start() may not initialize host->ops to a non NULL value. The test on the host_stop field of host->ops must then be preceded by a check that host->ops is not NULL.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Damien Le Moal damien.lemoal@wdc.com Reviewed-by: Hannes Reinecke hare@suse.de Link: https://lore.kernel.org/r/20210816014456.2191776-3-damien.lemoal@wdc.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 61c762961ca8..44f434acfce0 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5573,7 +5573,7 @@ int ata_host_start(struct ata_host *host) have_stop = 1; }
- if (host->ops->host_stop) + if (host->ops && host->ops->host_stop) have_stop = 1;
if (have_stop) {
From: Valentin Schneider valentin.schneider@arm.com
[ Upstream commit 0083242c93759dde353a963a90cb351c5c283379 ]
The scheduler currently expects NUMA node distances to be stable from init onwards, and as a consequence builds the related data structures once-and-for-all at init (see sched_init_numa()).
Unfortunately, on some architectures node distance is unreliable for offline nodes and may very well change upon onlining.
Skip over offline nodes during sched_init_numa(). Track nodes that have been onlined at least once, and trigger a build of a node's NUMA masks when it is first onlined post-init.
Reported-by: Geetika Moolchandani Geetika.Moolchandani1@ibm.com Signed-off-by: Srikar Dronamraju srikar@linux.vnet.ibm.com Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210818074333.48645-1-srikar@linux.vnet.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/topology.c | 65 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index b77ad49dc14f..4e8698e62f07 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1482,6 +1482,8 @@ int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; + +static unsigned long __read_mostly *sched_numa_onlined_nodes; #endif
/* @@ -1833,6 +1835,16 @@ void sched_init_numa(void) sched_domains_numa_masks[i][j] = mask;
for_each_node(k) { + /* + * Distance information can be unreliable for + * offline nodes, defer building the node + * masks to its bringup. + * This relies on all unique distance values + * still being visible at init time. + */ + if (!node_online(j)) + continue; + if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) sched_numa_warn("Node-distance not symmetric");
@@ -1886,6 +1898,53 @@ void sched_init_numa(void) sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
init_numa_topology_type(); + + sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL); + if (!sched_numa_onlined_nodes) + return; + + bitmap_zero(sched_numa_onlined_nodes, nr_node_ids); + for_each_online_node(i) + bitmap_set(sched_numa_onlined_nodes, i, 1); +} + +static void __sched_domains_numa_masks_set(unsigned int node) +{ + int i, j; + + /* + * NUMA masks are not built for offline nodes in sched_init_numa(). + * Thus, when a CPU of a never-onlined-before node gets plugged in, + * adding that new CPU to the right NUMA masks is not sufficient: the + * masks of that CPU's node must also be updated. + */ + if (test_bit(node, sched_numa_onlined_nodes)) + return; + + bitmap_set(sched_numa_onlined_nodes, node, 1); + + for (i = 0; i < sched_domains_numa_levels; i++) { + for (j = 0; j < nr_node_ids; j++) { + if (!node_online(j) || node == j) + continue; + + if (node_distance(j, node) > sched_domains_numa_distance[i]) + continue; + + /* Add remote nodes in our masks */ + cpumask_or(sched_domains_numa_masks[i][node], + sched_domains_numa_masks[i][node], + sched_domains_numa_masks[0][j]); + } + } + + /* + * A new node has been brought up, potentially changing the topology + * classification. + * + * Note that this is racy vs any use of sched_numa_topology_type :/ + */ + init_numa_topology_type(); }
void sched_domains_numa_masks_set(unsigned int cpu) @@ -1893,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu) int node = cpu_to_node(cpu); int i, j;
+ __sched_domains_numa_masks_set(node); + for (i = 0; i < sched_domains_numa_levels; i++) { for (j = 0; j < nr_node_ids; j++) { + if (!node_online(j)) + continue; + + /* Set ourselves in the remote node's masks */ if (node_distance(j, node) <= sched_domains_numa_distance[i]) cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); }
From: Ben Hutchings ben.hutchings@mind.be
[ Upstream commit fe4d55773b879c785ae61da9b1c2160f0110f67e ]
lockdep complains that in omap-aes, the list_lock is taken both with softirqs enabled at probe time, and also in softirq context, which could lead to a deadlock:
================================ WARNING: inconsistent lock state 5.14.0-rc1-00035-gc836005b01c5-dirty #69 Not tainted -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/0/7 [HC0[0]:SC1[3]:HE1:SE0] takes: bf00e014 (list_lock){+.?.}-{2:2}, at: omap_aes_find_dev+0x18/0x54 [omap_aes_driver] {SOFTIRQ-ON-W} state was registered at: _raw_spin_lock+0x40/0x50 omap_aes_probe+0x1d4/0x664 [omap_aes_driver] platform_probe+0x58/0xb8 really_probe+0xbc/0x314 __driver_probe_device+0x80/0xe4 driver_probe_device+0x30/0xc8 __driver_attach+0x70/0xf4 bus_for_each_dev+0x70/0xb4 bus_add_driver+0xf0/0x1d4 driver_register+0x74/0x108 do_one_initcall+0x84/0x2e4 do_init_module+0x5c/0x240 load_module+0x221c/0x2584 sys_finit_module+0xb0/0xec ret_fast_syscall+0x0/0x2c 0xbed90b30 irq event stamp: 111800 hardirqs last enabled at (111800): [<c02a21e4>] __kmalloc+0x484/0x5ec hardirqs last disabled at (111799): [<c02a21f0>] __kmalloc+0x490/0x5ec softirqs last enabled at (111776): [<c01015f0>] __do_softirq+0x2b8/0x4d0 softirqs last disabled at (111781): [<c0135948>] run_ksoftirqd+0x34/0x50
other info that might help us debug this: Possible unsafe locking scenario:
CPU0 ---- lock(list_lock); <Interrupt> lock(list_lock);
*** DEADLOCK ***
2 locks held by ksoftirqd/0/7: #0: c0f5e8c8 (rcu_read_lock){....}-{1:2}, at: netif_receive_skb+0x6c/0x260 #1: c0f5e8c8 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x2c/0xdc
stack backtrace: CPU: 0 PID: 7 Comm: ksoftirqd/0 Not tainted 5.14.0-rc1-00035-gc836005b01c5-dirty #69 Hardware name: Generic AM43 (Flattened Device Tree) [<c010e6e0>] (unwind_backtrace) from [<c010b9d0>] (show_stack+0x10/0x14) [<c010b9d0>] (show_stack) from [<c017c640>] (mark_lock.part.17+0x5bc/0xd04) [<c017c640>] (mark_lock.part.17) from [<c017d9e4>] (__lock_acquire+0x960/0x2fa4) [<c017d9e4>] (__lock_acquire) from [<c0180980>] (lock_acquire+0x10c/0x358) [<c0180980>] (lock_acquire) from [<c093d324>] (_raw_spin_lock_bh+0x44/0x58) [<c093d324>] (_raw_spin_lock_bh) from [<bf00b258>] (omap_aes_find_dev+0x18/0x54 [omap_aes_driver]) [<bf00b258>] (omap_aes_find_dev [omap_aes_driver]) from [<bf00b328>] (omap_aes_crypt+0x94/0xd4 [omap_aes_driver]) [<bf00b328>] (omap_aes_crypt [omap_aes_driver]) from [<c08ac6d0>] (esp_input+0x1b0/0x2c8) [<c08ac6d0>] (esp_input) from [<c08c9e90>] (xfrm_input+0x410/0x1290) [<c08c9e90>] (xfrm_input) from [<c08b6374>] (xfrm4_esp_rcv+0x54/0x11c) [<c08b6374>] (xfrm4_esp_rcv) from [<c0838840>] (ip_protocol_deliver_rcu+0x48/0x3bc) [<c0838840>] (ip_protocol_deliver_rcu) from [<c0838c50>] (ip_local_deliver_finish+0x9c/0xdc) [<c0838c50>] (ip_local_deliver_finish) from [<c0838dd8>] (ip_local_deliver+0x148/0x1b0) [<c0838dd8>] (ip_local_deliver) from [<c0838f5c>] (ip_rcv+0x11c/0x180) [<c0838f5c>] (ip_rcv) from [<c077e3a4>] (__netif_receive_skb_one_core+0x54/0x74) [<c077e3a4>] (__netif_receive_skb_one_core) from [<c077e588>] (netif_receive_skb+0xa8/0x260) [<c077e588>] (netif_receive_skb) from [<c068d6d4>] (cpsw_rx_handler+0x224/0x2fc) [<c068d6d4>] (cpsw_rx_handler) from [<c0688ccc>] (__cpdma_chan_process+0xf4/0x188) [<c0688ccc>] (__cpdma_chan_process) from [<c068a0c0>] (cpdma_chan_process+0x3c/0x5c) [<c068a0c0>] (cpdma_chan_process) from [<c0690e14>] (cpsw_rx_mq_poll+0x44/0x98) [<c0690e14>] (cpsw_rx_mq_poll) from [<c0780810>] (__napi_poll+0x28/0x268) [<c0780810>] (__napi_poll) from [<c0780c64>] (net_rx_action+0xcc/0x204) [<c0780c64>] (net_rx_action) from [<c0101478>] (__do_softirq+0x140/0x4d0) [<c0101478>] (__do_softirq) from [<c0135948>] (run_ksoftirqd+0x34/0x50) [<c0135948>] (run_ksoftirqd) from [<c01583b8>] (smpboot_thread_fn+0xf4/0x1d8) [<c01583b8>] (smpboot_thread_fn) from [<c01546dc>] (kthread+0x14c/0x174) [<c01546dc>] (kthread) from [<c010013c>] (ret_from_fork+0x14/0x38) ...
The omap-des and omap-sham drivers appear to have a similar issue.
Fix this by using spin_{,un}lock_bh() around device list access in all the probe and remove functions.
Signed-off-by: Ben Hutchings ben.hutchings@mind.be Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/omap-aes.c | 8 ++++---- drivers/crypto/omap-des.c | 8 ++++---- drivers/crypto/omap-sham.c | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 0dd4c6b157de..9b968ac4ee7b 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev) spin_lock_init(&dd->lock);
INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock);
/* Initialize crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev) if (!dd) return -ENODEV;
- spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index bc8631363d72..be77656864e3 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock);
/* Initialize des crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev) if (!dd) return -ENODEV;
- spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a47ac60a4ee1..63beea7cdba5 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2144,9 +2144,9 @@ static int omap_sham_probe(struct platform_device *pdev) (rev & dd->pdata->major_mask) >> dd->pdata->major_shift, (rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
- spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_add_tail(&dd->list, &sham.dev_list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock);
dd->engine = crypto_engine_alloc_init(dev, 1); if (!dd->engine) { @@ -2194,9 +2194,9 @@ static int omap_sham_probe(struct platform_device *pdev) err_engine_start: crypto_engine_exit(dd->engine); err_engine: - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); err_pm: pm_runtime_disable(dev); if (!dd->polling_mode) @@ -2215,9 +2215,9 @@ static int omap_sham_remove(struct platform_device *pdev) dd = platform_get_drvdata(pdev); if (!dd) return -ENODEV; - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { crypto_unregister_ahash(
From: Giovanni Cabiddu giovanni.cabiddu@intel.com
[ Upstream commit 5147f0906d50a9d26f2b8698cd06b5680e9867ff ]
The function adf_dev_init() ignores the error code reported by enable_vf2pf_comms(). If the latter fails, e.g. the VF is not compatible with the pf, then the load of the VF driver progresses. This patch changes adf_dev_init() so that the error code from enable_vf2pf_comms() is returned to the caller.
Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Marco Chiappero marco.chiappero@intel.com Reviewed-by: Fiona Trahe fiona.trahe@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/adf_init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 744c40351428..02864985dbb0 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) struct service_hndl *service; struct list_head *list_itr; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + int ret;
if (!hw_data) { dev_err(&GET_DEV(accel_dev), @@ -127,9 +128,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) }
hw_data->enable_error_correction(accel_dev); - hw_data->enable_vf2pf_comms(accel_dev); + ret = hw_data->enable_vf2pf_comms(accel_dev);
- return 0; + return ret; } EXPORT_SYMBOL_GPL(adf_dev_init);
From: Giovanni Cabiddu giovanni.cabiddu@intel.com
[ Upstream commit 0a73c762e1eee33a5e5dc0e3488f1b7cd17249b3 ]
The top half of the VF drivers handled only a source at the time. If an interrupt for PF2VF and bundle occurred at the same time, the ISR scheduled only the bottom half for PF2VF. This patch fixes the VF top half so that if both sources of interrupt trigger at the same time, both bottom halves are scheduled.
This patch is based on earlier work done by Conor McLoughlin.
Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Marco Chiappero marco.chiappero@intel.com Reviewed-by: Fiona Trahe fiona.trahe@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/adf_vf_isr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 888388acb6bd..3e4f64d248f9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -160,6 +160,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; + bool handled = false; u32 v_int;
/* Read VF INT source CSR to determine the source of VF interrupt */ @@ -172,7 +173,7 @@ static irqreturn_t adf_isr(int irq, void *privdata)
/* Schedule tasklet to handle interrupt BH */ tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet); - return IRQ_HANDLED; + handled = true; }
/* Check bundle interrupt */ @@ -184,10 +185,10 @@ static irqreturn_t adf_isr(int irq, void *privdata) csr_ops->write_csr_int_flag_and_col(bank->csr_addr, bank->bank_number, 0); tasklet_hi_schedule(&bank->resp_handler); - return IRQ_HANDLED; + handled = true; }
- return IRQ_NONE; + return handled ? IRQ_HANDLED : IRQ_NONE; }
static int adf_request_msi_irq(struct adf_accel_dev *accel_dev)
From: Marco Chiappero marco.chiappero@intel.com
[ Upstream commit 3d655732b0199562267a05c7ff69ecdd11632939 ]
Use reinit_completion() to set to a clean state a completion variable, used to coordinate the VF to PF request-response flow, before every new VF request.
Signed-off-by: Marco Chiappero marco.chiappero@intel.com Co-developed-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Fiona Trahe fiona.trahe@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index a1b77bd7a894..663638bb5c97 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -316,6 +316,8 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255);
+ reinit_completion(&accel_dev->vf.iov_msg_completion); + /* Send request from VF to PF */ ret = adf_iov_putmsg(accel_dev, msg, 0); if (ret) {
From: Marco Chiappero marco.chiappero@intel.com
[ Upstream commit b90c1c4d3fa8cd90f4e8245b13564380fd0bfad1 ]
At start and shutdown, VFs notify the PF about their state. These notifications are carried out through a message exchange using the PFVF protocol.
Function names lead to believe they do perform init or shutdown logic. This is to fix the naming to better reflect their purpose.
Signed-off-by: Marco Chiappero marco.chiappero@intel.com Co-developed-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Fiona Trahe fiona.trahe@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 4 ++-- drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 4 ++-- drivers/crypto/qat/qat_common/adf_common_drv.h | 8 ++++---- drivers/crypto/qat/qat_common/adf_vf2pf_msg.c | 12 ++++++------ .../qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 15f6b9bdfb22..ddf42fb32625 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index d231583428c9..7e202ef92523 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index c61476553728..dd4a811b7e89 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -198,8 +198,8 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
-int adf_vf2pf_init(struct adf_accel_dev *accel_dev); -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev); +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev); int adf_init_pf_wq(void); void adf_exit_pf_wq(void); int adf_init_vf_wq(void); @@ -222,12 +222,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { }
-static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { return 0; }
-static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { }
diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c index e85bd62d134a..3e25fac051b2 100644 --- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -5,14 +5,14 @@ #include "adf_pf2vf_msg.h"
/** - * adf_vf2pf_init() - send init msg to PF + * adf_vf2pf_notify_init() - send init msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends an init message from the VF to a PF * * Return: 0 on success, error code otherwise. */ -int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); return 0; } -EXPORT_SYMBOL_GPL(adf_vf2pf_init); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init);
/** - * adf_vf2pf_shutdown() - send shutdown msg to PF + * adf_vf2pf_notify_shutdown() - send shutdown msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends a shutdown message from the VF to a PF * * Return: void */ -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "Failed to send Shutdown event to PF\n"); } -EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index f14fb82ed6df..744734caaf7b 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -81,10 +81,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels;
From: Giovanni Cabiddu giovanni.cabiddu@intel.com
[ Upstream commit 645ae0af1840199086c33e4f841892ebee73f615 ]
The function adf_iov_putmsg() is only used inside the intel_qat module therefore should not be exported. Remove EXPORT_SYMBOL for the function adf_iov_putmsg().
Signed-off-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Reviewed-by: Fiona Trahe fiona.trahe@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index 663638bb5c97..efa4bffb4f60 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -186,7 +186,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
return ret; } -EXPORT_SYMBOL_GPL(adf_iov_putmsg);
void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) {
From: Kai Ye yekai13@huawei.com
[ Upstream commit 90367a027a22c3a9ca8b8bac15df34d9e859fc11 ]
Because the algs registration process has added a judgment. So need to add the judgment for the abnormal exiting process.
Signed-off-by: Kai Ye yekai13@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/hisilicon/sec2/sec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 490db7bccf61..8addbd7a3339 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -984,7 +984,8 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0;
err_alg_unregister: - hisi_qm_alg_unregister(qm, &sec_devices); + if (qm->qp_num >= ctx_q_num) + hisi_qm_alg_unregister(qm, &sec_devices); err_qm_stop: sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL);
From: Kai Ye yekai13@huawei.com
[ Upstream commit a52626106d6f7edf3d106c065e13a0313cfeb82f ]
When the endian configuration of the hardware is abnormal, it will cause the SEC engine is faulty that reports empty message. And it will affect the normal function of the hardware. Currently the soft configuration method can't restore the faulty device. The endian needs to be configured according to the system properties. So fix it.
Signed-off-by: Kai Ye yekai13@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/hisilicon/sec2/sec.h | 5 ---- drivers/crypto/hisilicon/sec2/sec_main.c | 31 +++++++----------------- 2 files changed, 9 insertions(+), 27 deletions(-)
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 018415b9840a..d97cf02b1df7 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -157,11 +157,6 @@ struct sec_ctx { struct device *dev; };
-enum sec_endian { - SEC_LE = 0, - SEC_32BE, - SEC_64BE -};
enum sec_debug_file_index { SEC_CLEAR_ENABLE, diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 8addbd7a3339..a0cc46b649a3 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -312,31 +312,20 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids);
-static u8 sec_get_endian(struct hisi_qm *qm) +static void sec_set_endian(struct hisi_qm *qm) { u32 reg;
- /* - * As for VF, it is a wrong way to get endian setting by - * reading a register of the engine - */ - if (qm->pdev->is_virtfn) { - dev_err_ratelimited(&qm->pdev->dev, - "cannot access a register in VF!\n"); - return SEC_LE; - } reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - /* BD little endian mode */ - if (!(reg & BIT(0))) - return SEC_LE; + reg &= ~(BIT(1) | BIT(0)); + if (!IS_ENABLED(CONFIG_64BIT)) + reg |= BIT(1);
- /* BD 32-bits big endian mode */ - else if (!(reg & BIT(1))) - return SEC_32BE;
- /* BD 64-bits big endian mode */ - else - return SEC_64BE; + if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + reg |= BIT(0); + + writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); }
static void sec_open_sva_prefetch(struct hisi_qm *qm) @@ -429,9 +418,7 @@ static int sec_engine_init(struct hisi_qm *qm) qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
/* config endian */ - reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - reg |= sec_get_endian(qm); - writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + sec_set_endian(qm);
return 0; }
From: Tianjia Zhang tianjia.zhang@linux.alibaba.com
[ Upstream commit 7b3d52683b3a47c0ba1dfd6b5994a3a795b06972 ]
There are several places where the return value check of crypto_aead_setkey and crypto_aead_setauthsize were lost. It is necessary to add these checks.
At the same time, move the crypto_aead_setauthsize() call out of the loop, and only need to call it once after load transform.
Fixee: 53f52d7aecb4 ("crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite") Signed-off-by: Tianjia Zhang tianjia.zhang@linux.alibaba.com Reviewed-by: Vitaly Chikunov vt@altlinux.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/tcrypt.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f8d06da78e4f..6863e57b088d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, }
ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_free_tfm; + }
for (i = 0; i < num_mb; ++i) if (testmgr_alloc_buf(data[i].xbuf)) { @@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, for (i = 0; i < num_mb; ++i) { data[i].req = aead_request_alloc(tfm, GFP_KERNEL); if (!data[i].req) { - pr_err("alg: skcipher: Failed to allocate request for %s\n", + pr_err("alg: aead: Failed to allocate request for %s\n", algo); while (i--) aead_request_free(data[i].req); @@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, sgout = &sg[9];
tfm = crypto_alloc_aead(algo, 0, 0); - if (IS_ERR(tfm)) { pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, PTR_ERR(tfm)); goto out_notfm; }
+ ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_noreq; + } + crypto_init_wait(&wait); printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); @@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, break; } } + ret = crypto_aead_setkey(tfm, key, *keysize); - ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("setkey() failed flags=%x: %d\n", + crypto_aead_get_flags(tfm), ret); + goto out; + }
iv_len = crypto_aead_ivsize(tfm); if (iv_len) @@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", i, *keysize * 8, bs);
- memset(tvmem[0], 0xff, PAGE_SIZE);
- if (ret) { - pr_err("setkey() failed flags=%x\n", - crypto_aead_get_flags(tfm)); - goto out; - } - sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize), assoc, aad_size);
From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
[ Upstream commit f671a691e299f58835d4660d642582bf0e8f6fda ]
Syzbot reports a potential deadlock in do_fcntl:
======================================================== WARNING: possible irq lock inversion dependency detected 5.12.0-syzkaller #0 Not tainted -------------------------------------------------------- syz-executor132/8391 just changed the state of lock: ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: f_getown_ex fs/fcntl.c:211 [inline] ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: do_fcntl+0x8b4/0x1200 fs/fcntl.c:395 but this lock was taken by another, HARDIRQ-safe lock in the past: (&dev->event_lock){-...}-{2:2}
and interrupts could create inverse lock ordering between them.
other info that might help us debug this: Chain exists of: &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock
Possible interrupt unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&f->f_owner.lock); local_irq_disable(); lock(&dev->event_lock); lock(&new->fa_lock); <Interrupt> lock(&dev->event_lock);
*** DEADLOCK ***
This happens because there is a lock hierarchy of &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock from the following call chain:
input_inject_event(): spin_lock_irqsave(&dev->event_lock,...); input_handle_event(): input_pass_values(): input_to_handler(): evdev_events(): evdev_pass_values(): spin_lock(&client->buffer_lock); __pass_event(): kill_fasync(): kill_fasync_rcu(): read_lock(&fa->fa_lock); send_sigio(): read_lock_irqsave(&fown->lock,...);
However, since &dev->event_lock is HARDIRQ-safe, interrupts have to be disabled while grabbing &f->f_owner.lock, otherwise we invert the lock hierarchy.
Hence, we replace calls to read_lock/read_unlock on &f->f_owner.lock, with read_lock_irq/read_unlock_irq.
Reported-and-tested-by: syzbot+e6d5398a02c516ce5e70@syzkaller.appspotmail.com Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Signed-off-by: Jeff Layton jlayton@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/fcntl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/fs/fcntl.c b/fs/fcntl.c index f946bec8f1f1..932ec1e9f5bf 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -150,7 +150,8 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { pid_t pid = 0; - read_lock(&filp->f_owner.lock); + + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { pid = pid_vnr(filp->f_owner.pid); @@ -158,7 +159,7 @@ pid_t f_getown(struct file *filp) pid = -pid; } rcu_read_unlock(); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); return pid; }
@@ -208,7 +209,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) struct f_owner_ex owner = {}; int ret = 0;
- read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) owner.pid = pid_vnr(filp->f_owner.pid); @@ -231,7 +232,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) ret = -EINVAL; break; } - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock);
if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); @@ -249,10 +250,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) uid_t src[2]; int err;
- read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); src[0] = from_kuid(user_ns, filp->f_owner.uid); src[1] = from_kuid(user_ns, filp->f_owner.euid); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock);
err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]);
From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
[ Upstream commit 2f488f698fda820f8e6fa0407630154eceb145d6 ]
There is an existing lock hierarchy of &dev->event_lock --> &fasync_struct.fa_lock --> &f->f_owner.lock from the following call chain:
input_inject_event(): spin_lock_irqsave(&dev->event_lock,...); input_handle_event(): input_pass_values(): input_to_handler(): evdev_events(): evdev_pass_values(): spin_lock(&client->buffer_lock); __pass_event(): kill_fasync(): kill_fasync_rcu(): read_lock(&fa->fa_lock); send_sigio(): read_lock_irqsave(&fown->lock,...);
&dev->event_lock is HARDIRQ-safe, so interrupts have to be disabled while grabbing &fasync_struct.fa_lock, otherwise we invert the lock hierarchy. However, since kill_fasync which calls kill_fasync_rcu is an exported symbol, it may not necessarily be called with interrupts disabled.
As kill_fasync_rcu may be called with interrupts disabled (for example, in the call chain above), we replace calls to read_lock/read_unlock on &fasync_struct.fa_lock in kill_fasync_rcu with read_lock_irqsave/read_unlock_irqrestore.
Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Signed-off-by: Jeff Layton jlayton@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/fcntl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/fcntl.c b/fs/fcntl.c index 932ec1e9f5bf..68added37c15 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1004,13 +1004,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags;
if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - read_lock(&fa->fa_lock); + read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -1019,7 +1020,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - read_unlock(&fa->fa_lock); + read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } }
From: Stian Skjelstad stian.skjelstad@gmail.com
[ Upstream commit 58bc6d1be2f3b0ceecb6027dfa17513ec6aa2abb ]
When parsing the ExtendedAttr data, malicous or corrupt attribute length could cause kernel hangs and buffer overruns in some special cases.
Link: https://lore.kernel.org/r/20210822093332.25234-1-stian.skjelstad@gmail.com Signed-off-by: Stian Skjelstad stian.skjelstad@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/udf/misc.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/fs/udf/misc.c b/fs/udf/misc.c index eab94527340d..1614d308d0f0 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, else offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) { + while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) { + uint32_t attrLength; + gaf = (struct genericFormat *)&ea[offset]; + attrLength = le32_to_cpu(gaf->attrLength); + + /* Detect undersized elements and buffer overflows */ + if ((attrLength < sizeof(*gaf)) || + (attrLength > (iinfo->i_lenEAttr - offset))) + break; + if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) return gaf; else - offset += le32_to_cpu(gaf->attrLength); + offset += attrLength; } }
From: Jens Axboe axboe@kernel.dk
[ Upstream commit d3e9f732c415cf22faa33d6f195e291ad82dc92e ]
Daniel reports that the v5.14-rc4-rt4 kernel throws a BUG when running stress-ng:
| [ 90.202543] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:35 | [ 90.202549] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2047, name: iou-wrk-2041 | [ 90.202555] CPU: 5 PID: 2047 Comm: iou-wrk-2041 Tainted: G W 5.14.0-rc4-rt4+ #89 | [ 90.202559] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 | [ 90.202561] Call Trace: | [ 90.202577] dump_stack_lvl+0x34/0x44 | [ 90.202584] ___might_sleep.cold+0x87/0x94 | [ 90.202588] rt_spin_lock+0x19/0x70 | [ 90.202593] ___slab_alloc+0xcb/0x7d0 | [ 90.202598] ? newidle_balance.constprop.0+0xf5/0x3b0 | [ 90.202603] ? dequeue_entity+0xc3/0x290 | [ 90.202605] ? io_wqe_dec_running.isra.0+0x98/0xe0 | [ 90.202610] ? pick_next_task_fair+0xb9/0x330 | [ 90.202612] ? __schedule+0x670/0x1410 | [ 90.202615] ? io_wqe_dec_running.isra.0+0x98/0xe0 | [ 90.202618] kmem_cache_alloc_trace+0x79/0x1f0 | [ 90.202621] io_wqe_dec_running.isra.0+0x98/0xe0 | [ 90.202625] io_wq_worker_sleeping+0x37/0x50 | [ 90.202628] schedule+0x30/0xd0 | [ 90.202630] schedule_timeout+0x8f/0x1a0 | [ 90.202634] ? __bpf_trace_tick_stop+0x10/0x10 | [ 90.202637] io_wqe_worker+0xfd/0x320 | [ 90.202641] ? finish_task_switch.isra.0+0xd3/0x290 | [ 90.202644] ? io_worker_handle_work+0x670/0x670 | [ 90.202646] ? io_worker_handle_work+0x670/0x670 | [ 90.202649] ret_from_fork+0x22/0x30
which is due to the RT kernel not liking a GFP_ATOMIC allocation inside a raw spinlock. Besides that not working on RT, doing any kind of allocation from inside schedule() is kind of nasty and should be avoided if at all possible.
This particular path happens when an io-wq worker goes to sleep, and we need a new worker to handle pending work. We currently allocate a small data item to hold the information we need to create a new worker, but we can instead include this data in the io_worker struct itself and just protect it with a single bit lock. We only really need one per worker anyway, as we will have run pending work between to sleep cycles.
https://lore.kernel.org/lkml/20210804082418.fbibprcwtzyt5qax@beryllium.lan/ Reported-by: Daniel Wagner dwagner@suse.de Tested-by: Daniel Wagner dwagner@suse.de Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/io-wq.c | 72 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 32 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c index 7d2ed8c7dd31..4ce83bb48021 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -51,6 +51,10 @@ struct io_worker {
struct completion ref_done;
+ unsigned long create_state; + struct callback_head create_work; + int create_index; + struct rcu_head rcu; };
@@ -272,24 +276,18 @@ static void io_wqe_inc_running(struct io_worker *worker) atomic_inc(&acct->nr_running); }
-struct create_worker_data { - struct callback_head work; - struct io_wqe *wqe; - int index; -}; - static void create_worker_cb(struct callback_head *cb) { - struct create_worker_data *cwd; + struct io_worker *worker; struct io_wq *wq; struct io_wqe *wqe; struct io_wqe_acct *acct; bool do_create = false, first = false;
- cwd = container_of(cb, struct create_worker_data, work); - wqe = cwd->wqe; + worker = container_of(cb, struct io_worker, create_work); + wqe = worker->wqe; wq = wqe->wq; - acct = &wqe->acct[cwd->index]; + acct = &wqe->acct[worker->create_index]; raw_spin_lock_irq(&wqe->lock); if (acct->nr_workers < acct->max_workers) { if (!acct->nr_workers) @@ -299,33 +297,42 @@ static void create_worker_cb(struct callback_head *cb) } raw_spin_unlock_irq(&wqe->lock); if (do_create) { - create_io_worker(wq, wqe, cwd->index, first); + create_io_worker(wq, wqe, worker->create_index, first); } else { atomic_dec(&acct->nr_running); io_worker_ref_put(wq); } - kfree(cwd); + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); }
-static void io_queue_worker_create(struct io_wqe *wqe, struct io_wqe_acct *acct) +static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker, + struct io_wqe_acct *acct) { - struct create_worker_data *cwd; struct io_wq *wq = wqe->wq;
/* raced with exit, just ignore create call */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) goto fail; + if (!io_worker_get(worker)) + goto fail; + /* + * create_state manages ownership of create_work/index. We should + * only need one entry per worker, as the worker going to sleep + * will trigger the condition, and waking will clear it once it + * runs the task_work. + */ + if (test_bit(0, &worker->create_state) || + test_and_set_bit_lock(0, &worker->create_state)) + goto fail_release;
- cwd = kmalloc(sizeof(*cwd), GFP_ATOMIC); - if (cwd) { - init_task_work(&cwd->work, create_worker_cb); - cwd->wqe = wqe; - cwd->index = acct->index; - if (!task_work_add(wq->task, &cwd->work, TWA_SIGNAL)) - return; - - kfree(cwd); - } + init_task_work(&worker->create_work, create_worker_cb); + worker->create_index = acct->index; + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) + return; + clear_bit_unlock(0, &worker->create_state); +fail_release: + io_worker_release(worker); fail: atomic_dec(&acct->nr_running); io_worker_ref_put(wq); @@ -343,7 +350,7 @@ static void io_wqe_dec_running(struct io_worker *worker) if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); - io_queue_worker_create(wqe, acct); + io_queue_worker_create(wqe, worker, acct); } }
@@ -1004,12 +1011,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
static bool io_task_work_match(struct callback_head *cb, void *data) { - struct create_worker_data *cwd; + struct io_worker *worker;
if (cb->func != create_worker_cb) return false; - cwd = container_of(cb, struct create_worker_data, work); - return cwd->wqe->wq == data; + worker = container_of(cb, struct io_worker, create_work); + return worker->wqe->wq == data; }
void io_wq_exit_start(struct io_wq *wq) @@ -1026,12 +1033,13 @@ static void io_wq_exit_workers(struct io_wq *wq) return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { - struct create_worker_data *cwd; + struct io_worker *worker;
- cwd = container_of(cb, struct create_worker_data, work); - atomic_dec(&cwd->wqe->acct[cwd->index].nr_running); + worker = container_of(cb, struct io_worker, create_work); + atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); io_worker_ref_put(wq); - kfree(cwd); + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); }
rcu_read_lock();
From: Alexander Gordeev agordeev@linux.ibm.com
[ Upstream commit ddd63c85ef67ea9ea7282ad35eafb6568047126e ]
It is currently possible to initialize a large PMD page when the address is not aligned on page boundary.
Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Reviewed-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/mm/kasan_init.c | 41 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index a0fdc6dc5f9d..cc3af046c14e 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -107,6 +107,9 @@ static void __init kasan_early_pgtable_populate(unsigned long address, sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; }
+ /* + * The first 1MB of 1:1 mapping is mapped with 4KB pages + */ while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -157,30 +160,26 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PMD_SIZE) && + if (IS_ALIGNED(address, PMD_SIZE) && end - address >= PMD_SIZE) { - pmd_populate(&init_mm, pm_dir, - kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - /* the first megabyte of 1:1 is mapped with 4k pages */ - if (has_edat && address && end - address >= PMD_SIZE && - mode != POPULATE_ZERO_SHADOW) { - void *page; - - if (mode == POPULATE_ONE2ONE) { - page = (void *)address; - } else { - page = kasan_early_alloc_segment(); - memset(page, 0, _SEGMENT_SIZE); + if (mode == POPULATE_ZERO_SHADOW) { + pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } else if (has_edat && address) { + void *page; + + if (mode == POPULATE_ONE2ONE) { + page = (void *)address; + } else { + page = kasan_early_alloc_segment(); + memset(page, 0, _SEGMENT_SIZE); + } + pmd_val(*pm_dir) = __pa(page) | sgt_prot; + address = (address + PMD_SIZE) & PMD_MASK; + continue; } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; } - pt_dir = kasan_early_pte_alloc(); pmd_populate(&init_mm, pm_dir, pt_dir); } else if (pmd_large(*pm_dir)) {
From: Niklas Schnelle schnelle@linux.ibm.com
[ Upstream commit f7addcdd527a6dddfebe20c358b87bdb95624612 ]
Currently clp_set_pci_fn() always returns 0 as long as the CLP request itself succeeds even if the operation itself returns a response code other than CLP_RC_OK or CLP_RC_SETPCIFN_ALRDY. This is highly misleading because calling code assumes that a zero rc means that the operation was successful.
Fix this by returning the response code or cc on failure with the exception of the special handling for CLP_RC_SETPCIFN_ALRDY. Also let's not assume that the returned function handle for CLP_RC_SETPCIFN_ALRDY is 0, we don't need it anyway.
Reviewed-by: Matthew Rosato mjrosato@linux.ibm.com Signed-off-by: Niklas Schnelle schnelle@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/pci/pci.c | 7 ++++--- arch/s390/pci/pci_clp.c | 33 ++++++++++++++++----------------- 2 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8fcb7ecb7225..77cd965cffef 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -661,9 +661,10 @@ int zpci_enable_device(struct zpci_dev *zdev) { int rc;
- rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); - if (rc) + if (clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES)) { + rc = -EIO; goto out; + }
rc = zpci_dma_init_device(zdev); if (rc) @@ -684,7 +685,7 @@ int zpci_disable_device(struct zpci_dev *zdev) * The zPCI function may already be disabled by the platform, this is * detected in clp_disable_fh() which becomes a no-op. */ - return clp_disable_fh(zdev); + return clp_disable_fh(zdev) ? -EIO : 0; }
/** diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d3331596ddbe..0a0e8b8293be 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -213,15 +213,19 @@ int clp_query_pci_fn(struct zpci_dev *zdev) }
static int clp_refresh_fh(u32 fid); -/* - * Enable/Disable a given PCI function and update its function handle if - * necessary +/** + * clp_set_pci_fn() - Execute a command on a PCI function + * @zdev: Function that will be affected + * @nr_dma_as: DMA address space number + * @command: The command code to execute + * + * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and + * > 0 for non-success platform responses */ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; - u32 fid = zdev->fid;
rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -245,17 +249,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { - zpci_err("Set PCI FN:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - } - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { zdev->fh = rrb->response.fh; - } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && - rrb->response.fh == 0) { + } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY) { /* Function is already in desired state - update handle */ - rc = clp_refresh_fh(fid); + rc = clp_refresh_fh(zdev->fid); + } else { + zpci_err("Set PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + if (!rc) + rc = rrb->response.hdr.rsp; } clp_free_block(rrb); return rc; @@ -301,17 +304,13 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); - if (rc) - goto out; - - if (zpci_use_mio(zdev)) { + if (!rc && zpci_use_mio(zdev)) { rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); if (rc) clp_disable_fh(zdev); } -out: return rc; }
From: Peter Oberparleiter oberpar@linux.ibm.com
[ Upstream commit 1204777867e8486a88dbb4793fe256b31ea05eeb ]
Any previously recorded s390dbf debug data is reset when a debug area is resized using the 'pages' sysfs attribute. This can make live-debugging unnecessarily complex.
Fix this by copying existing debug data to the newly allocated debug area when resizing.
Signed-off-by: Peter Oberparleiter oberpar@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/debug.c | 74 ++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 21 deletions(-)
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 09b6c6402f9b..0dbe48f550ff 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/minmax.h> #include <linux/debugfs.h>
#include <asm/debug.h> @@ -92,6 +93,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); +static void debug_areas_swap(debug_info_t *a, debug_info_t *b); +static void debug_events_append(debug_info_t *dest, debug_info_t *src);
/* globals */
@@ -726,35 +729,28 @@ EXPORT_SYMBOL(debug_unregister); */ static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area) { - debug_entry_t ***new_areas; + debug_info_t *new_id; unsigned long flags; - int rc = 0;
if (!id || (nr_areas <= 0) || (pages_per_area < 0)) return -EINVAL; - if (pages_per_area > 0) { - new_areas = debug_areas_alloc(pages_per_area, nr_areas); - if (!new_areas) { - pr_info("Allocating memory for %i pages failed\n", - pages_per_area); - rc = -ENOMEM; - goto out; - } - } else { - new_areas = NULL; + + new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!new_id) { + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); + return -ENOMEM; } + spin_lock_irqsave(&id->lock, flags); - debug_areas_free(id); - id->areas = new_areas; - id->nr_areas = nr_areas; - id->pages_per_area = pages_per_area; - id->active_area = 0; - memset(id->active_entries, 0, sizeof(int)*id->nr_areas); - memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + debug_events_append(new_id, id); + debug_areas_swap(new_id, id); + debug_info_free(new_id); spin_unlock_irqrestore(&id->lock, flags); pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area); -out: - return rc; + + return 0; }
/** @@ -821,6 +817,42 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) id->active_entries[id->active_area]); }
+/* Swap debug areas of a and b. */ +static void debug_areas_swap(debug_info_t *a, debug_info_t *b) +{ + swap(a->nr_areas, b->nr_areas); + swap(a->pages_per_area, b->pages_per_area); + swap(a->areas, b->areas); + swap(a->active_area, b->active_area); + swap(a->active_pages, b->active_pages); + swap(a->active_entries, b->active_entries); +} + +/* Append all debug events in active area from source to destination log. */ +static void debug_events_append(debug_info_t *dest, debug_info_t *src) +{ + debug_entry_t *from, *to, *last; + + if (!src->areas || !dest->areas) + return; + + /* Loop over all entries in src, starting with oldest. */ + from = get_active_entry(src); + last = from; + do { + if (from->clock != 0LL) { + to = get_active_entry(dest); + memset(to, 0, dest->entry_size); + memcpy(to, from, min(src->entry_size, + dest->entry_size)); + proceed_active_entry(dest); + } + + proceed_active_entry(src); + from = get_active_entry(src); + } while (from != last); +} + /* * debug_finish_entry: * - set timestamp, caller address, cpu number etc.
From: Peter Oberparleiter oberpar@linux.ibm.com
[ Upstream commit 9372a82892c2caa6bccab9a4081166fa769699f8 ]
Currently allocation and registration of s390dbf debug areas are tied together. As a result, a debug area cannot be unregistered and re-registered while any process has an associated debugfs file open.
Fix this by splitting alloc/release from register/unregister.
Signed-off-by: Peter Oberparleiter oberpar@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/debug.c | 102 +++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 46 deletions(-)
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 0dbe48f550ff..05b765b8038e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -314,24 +314,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, goto out;
rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - refcount_set(&rc->ref_count, 1); out: return rc; @@ -391,27 +373,10 @@ static void debug_info_get(debug_info_t *db_info) */ static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (refcount_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if (db_info == debug_area_first) - debug_area_first = db_info->next; - if (db_info == debug_area_last) - debug_area_last = db_info->prev; - if (db_info->prev) - db_info->prev->next = db_info->next; - if (db_info->next) - db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } }
/* @@ -635,6 +600,31 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ }
+/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + /** * debug_register_mode() - creates and initializes debug area. * @@ -664,19 +654,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex);
/* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if (!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc) + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); @@ -705,6 +692,27 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register);
+/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + /** * debug_unregister() - give back debug area. * @@ -718,8 +726,10 @@ void debug_unregister(debug_info_t *id) if (!id) return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); + + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister);
From: Harald Freudenberger freude@linux.ibm.com
[ Upstream commit cabebb697c98fb1f05cc950a747a9b6ec61a5b01 ]
If for any reason the interrupt enable for an ap queue fails the state machine run for the queue returned wrong return codes to the caller. So the caller assumed interrupt support for this queue in enabled and thus did not re-establish the high resolution timer used for polling. In the end this let to a hang for the user space process waiting "forever" for the reply.
This patch reworks these return codes to return correct indications for the caller to re-establish the timer when a queue runs without interrupt support.
Please note that this is fixing a wrong behavior after a first failure (enable interrupt support for the queue) failed. However, looks like this occasionally happens on KVM systems.
Signed-off-by: Harald Freudenberger freude@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/crypto/ap_bus.c | 25 ++++++++----------------- drivers/s390/crypto/ap_bus.h | 10 ++-------- drivers/s390/crypto/ap_queue.c | 20 +++++++++++--------- 3 files changed, 21 insertions(+), 34 deletions(-)
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 8d3a1d84a757..9c4f3c388934 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -127,22 +127,13 @@ static struct bus_type ap_bus_type; /* Adapter interrupt definitions */ static void ap_interrupt_handler(struct airq_struct *airq, bool floating);
-static int ap_airq_flag; +static bool ap_irq_flag;
static struct airq_struct ap_airq = { .handler = ap_interrupt_handler, .isc = AP_ISC, };
-/** - * ap_using_interrupts() - Returns non-zero if interrupt support is - * available. - */ -static inline int ap_using_interrupts(void) -{ - return ap_airq_flag; -} - /** * ap_airq_ptr() - Get the address of the adapter interrupt indicator * @@ -152,7 +143,7 @@ static inline int ap_using_interrupts(void) */ void *ap_airq_ptr(void) { - if (ap_using_interrupts()) + if (ap_irq_flag) return ap_airq.lsi_ptr; return NULL; } @@ -396,7 +387,7 @@ void ap_wait(enum ap_sm_wait wait) switch (wait) { case AP_SM_WAIT_AGAIN: case AP_SM_WAIT_INTERRUPT: - if (ap_using_interrupts()) + if (ap_irq_flag) break; if (ap_poll_kthread) { wake_up(&ap_poll_wait); @@ -471,7 +462,7 @@ static void ap_tasklet_fn(unsigned long dummy) * be received. Doing it in the beginning of the tasklet is therefor * important that no requests on any AP get lost. */ - if (ap_using_interrupts()) + if (ap_irq_flag) xchg(ap_airq.lsi_ptr, 0);
spin_lock_bh(&ap_queues_lock); @@ -541,7 +532,7 @@ static int ap_poll_thread_start(void) { int rc;
- if (ap_using_interrupts() || ap_poll_kthread) + if (ap_irq_flag || ap_poll_kthread) return 0; mutex_lock(&ap_poll_thread_mutex); ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll"); @@ -1187,7 +1178,7 @@ static BUS_ATTR_RO(ap_adapter_mask); static ssize_t ap_interrupts_show(struct bus_type *bus, char *buf) { return scnprintf(buf, PAGE_SIZE, "%d\n", - ap_using_interrupts() ? 1 : 0); + ap_irq_flag ? 1 : 0); }
static BUS_ATTR_RO(ap_interrupts); @@ -1912,7 +1903,7 @@ static int __init ap_module_init(void) /* enable interrupts if available */ if (ap_interrupts_available()) { rc = register_adapter_interrupt(&ap_airq); - ap_airq_flag = (rc == 0); + ap_irq_flag = (rc == 0); }
/* Create /sys/bus/ap. */ @@ -1956,7 +1947,7 @@ static int __init ap_module_init(void) out_bus: bus_unregister(&ap_bus_type); out: - if (ap_using_interrupts()) + if (ap_irq_flag) unregister_adapter_interrupt(&ap_airq); kfree(ap_qci_info); return rc; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 8f18abdbbc2b..6dd5e8f0380c 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -80,12 +80,6 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_FUNC_EP11 5 #define AP_FUNC_APXA 6
-/* - * AP interrupt states - */ -#define AP_INTR_DISABLED 0 /* AP interrupt disabled */ -#define AP_INTR_ENABLED 1 /* AP interrupt enabled */ - /* * AP queue state machine states */ @@ -112,7 +106,7 @@ enum ap_sm_event { * AP queue state wait behaviour */ enum ap_sm_wait { - AP_SM_WAIT_AGAIN, /* retry immediately */ + AP_SM_WAIT_AGAIN = 0, /* retry immediately */ AP_SM_WAIT_TIMEOUT, /* wait for timeout */ AP_SM_WAIT_INTERRUPT, /* wait for thin interrupt (if available) */ AP_SM_WAIT_NONE, /* no wait */ @@ -186,7 +180,7 @@ struct ap_queue { enum ap_dev_state dev_state; /* queue device state */ bool config; /* configured state */ ap_qid_t qid; /* AP queue id. */ - int interrupt; /* indicate if interrupts are enabled */ + bool interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ int pendingq_count; /* # requests on pendingq list. */ int requestq_count; /* # requests on requestq list. */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 669f96fddad6..d70c4d3d0907 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -19,7 +19,7 @@ static void __ap_flush_queue(struct ap_queue *aq);
/** - * ap_queue_enable_interruption(): Enable interruption on an AP queue. + * ap_queue_enable_irq(): Enable interrupt support on this AP queue. * @qid: The AP queue number * @ind: the notification indicator byte * @@ -27,7 +27,7 @@ static void __ap_flush_queue(struct ap_queue *aq); * value it waits a while and tests the AP queue if interrupts * have been switched on using ap_test_queue(). */ -static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind) +static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) { struct ap_queue_status status; struct ap_qirq_ctrl qirqctrl = { 0 }; @@ -218,7 +218,8 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) return AP_SM_WAIT_NONE; case AP_RESPONSE_NO_PENDING_REPLY: if (aq->queue_count > 0) - return AP_SM_WAIT_INTERRUPT; + return aq->interrupt ? + AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT; aq->sm_state = AP_SM_STATE_IDLE; return AP_SM_WAIT_NONE; default: @@ -272,7 +273,8 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) fallthrough; case AP_RESPONSE_Q_FULL: aq->sm_state = AP_SM_STATE_QUEUE_FULL; - return AP_SM_WAIT_INTERRUPT; + return aq->interrupt ? + AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT; case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; return AP_SM_WAIT_TIMEOUT; @@ -322,7 +324,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; - aq->interrupt = AP_INTR_DISABLED; + aq->interrupt = false; return AP_SM_WAIT_TIMEOUT; default: aq->dev_state = AP_DEV_STATE_ERROR; @@ -355,7 +357,7 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq) switch (status.response_code) { case AP_RESPONSE_NORMAL: lsi_ptr = ap_airq_ptr(); - if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0) + if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) aq->sm_state = AP_SM_STATE_SETIRQ_WAIT; else aq->sm_state = (aq->queue_count > 0) ? @@ -396,7 +398,7 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
if (status.irq_enabled == 1) { /* Irqs are now enabled */ - aq->interrupt = AP_INTR_ENABLED; + aq->interrupt = true; aq->sm_state = (aq->queue_count > 0) ? AP_SM_STATE_WORKING : AP_SM_STATE_IDLE; } @@ -586,7 +588,7 @@ static ssize_t interrupt_show(struct device *dev, spin_lock_bh(&aq->lock); if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n"); - else if (aq->interrupt == AP_INTR_ENABLED) + else if (aq->interrupt) rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n"); else rc = scnprintf(buf, PAGE_SIZE, "Interrupts disabled.\n"); @@ -767,7 +769,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) aq->ap_dev.device.type = &ap_queue_type; aq->ap_dev.device_type = device_type; aq->qid = qid; - aq->interrupt = AP_INTR_DISABLED; + aq->interrupt = false; spin_lock_init(&aq->lock); INIT_LIST_HEAD(&aq->pendingq); INIT_LIST_HEAD(&aq->requestq);
From: Alexander Gordeev agordeev@linux.ibm.com
[ Upstream commit 915fea04f9320d0f4ab6ecbb6bf759eebcd2c41d ]
The restart interrupt is triggered whenever a secondary CPU is brought online, a remote function call dispatched from another CPU or a manual PSW restart is initiated and causes the system to kdump. The handling routine is always called with DAT turned off. It then initializes the stack frame and invokes a callback.
The existing callbacks handle DAT as follows:
* __do_restart() and __machine_kexec() turn in on upon entry; * __ipl_run(), __reipl_run() and __dump_run() do not turn it right away, but all of them call diag308() - which turns DAT on, but only if kasan is enabled;
In addition to the described complexity all callbacks (and the functions they call) should avoid kasan instrumentation while DAT is off.
This update enables DAT in the assembler restart handler and relieves any callbacks (which are mostly C functions) from dealing with DAT altogether.
There are four types of CPU restart that initialize control registers in different ways:
1. Start of secondary CPU on boot - control registers are inherited from the IPL CPU; 2. Restart of online CPU - control registers of the CPU being restarted are kept; 3. Hotplug of offline CPU - control registers are inherited from the starting CPU; 4. Start of offline CPU triggered by manual PSW restart - the control registers are read from the absolute lowcore and contain the boot time IPL CPU values updated with all follow-up calls of smp_ctl_set_bit() and smp_ctl_clear_bit() routines;
In first three cases contents of the control registers is the most recent. In the latter case control registers are good enough to facilitate successful completion of kdump operation.
Suggested-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/include/asm/lowcore.h | 3 ++- arch/s390/include/asm/processor.h | 2 ++ arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 11 +++++++---- arch/s390/kernel/ipl.c | 3 --- arch/s390/kernel/machine_kexec.c | 1 - arch/s390/kernel/setup.c | 9 ++++++++- arch/s390/kernel/smp.c | 31 ++++++++++++++++++++++--------- 8 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 47bde5a20a41..11213c8bfca5 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,8 @@ struct lowcore { /* Restart function and parameter. */ __u64 restart_fn; /* 0x0370 */ __u64 restart_data; /* 0x0378 */ - __u64 restart_source; /* 0x0380 */ + __u32 restart_source; /* 0x0380 */ + __u32 restart_flags; /* 0x0384 */
/* Address space pointer. */ __u64 kernel_asce; /* 0x0388 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index ddc7858bbce4..879b8e3f609c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -26,6 +26,8 @@ #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) #define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU)
+#define RESTART_FLAG_CTLREGS _AC(1 << 0, U) + #ifndef __ASSEMBLY__
#include <linux/cpumask.h> diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 77ff2130cb04..dc53b0452ce2 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -116,6 +116,7 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags); OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5a2f70cbd3a9..b9716a7e326d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -624,12 +624,15 @@ ENTRY(mcck_int_handler) 4: j 4b ENDPROC(mcck_int_handler)
-# -# PSW restart interrupt handler -# ENTRY(restart_int_handler) ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART + TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 + jz 0f + la %r15,4095 + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15) +0: larl %r15,.Lstosm_tmp + stosm 0(%r15),0x04 # turn dat on, keep irqs off lg %r15,__LC_RESTART_STACK xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -638,7 +641,7 @@ ENTRY(restart_int_handler) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) lg %r1,__LC_RESTART_FN # load fn, parm & source cpu lg %r2,__LC_RESTART_DATA - lg %r3,__LC_RESTART_SOURCE + lgf %r3,__LC_RESTART_SOURCE ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 50e2c21e0ec9..911cd3912351 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -179,8 +179,6 @@ static inline int __diag308(unsigned long subcode, void *addr)
int diag308(unsigned long subcode, void *addr) { - if (IS_ENABLED(CONFIG_KASAN)) - __arch_local_irq_stosm(0x04); /* enable DAT */ diag_stat_inc(DIAG_STAT_X308); return __diag308(subcode, addr); } @@ -1843,7 +1841,6 @@ static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
static void __do_restart(void *ignore) { - __arch_local_irq_stosm(0x04); /* enable DAT */ smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 1005a6935fbe..c1fbc979e0e8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -263,7 +263,6 @@ static void __do_machine_kexec(void *data) */ static void __machine_kexec(void *data) { - __arch_local_irq_stosm(0x04); /* enable DAT */ pfault_fini(); tracing_off(); debug_locks_off(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ff0f9e838916..ee23908f1b96 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -421,7 +421,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_stack = (unsigned long) restart_stack; lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; - lc->restart_source = -1UL; + lc->restart_source = -1U;
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -450,12 +450,19 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void) { + struct lowcore *lc = lowcore_ptr[0]; + __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; + __ctl_store(S390_lowcore.cregs_save_area, 0, 15); __ctl_set_bit(0, 28); + mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); + mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); + memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, + sizeof(S390_lowcore.cregs_save_area)); }
static struct resource code_resource = { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8984711f72ed..8e8ace899407 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -252,6 +252,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; @@ -297,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) lc->restart_stack = lc->nodat_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; - lc->restart_source = -1UL; + lc->restart_source = -1U; pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); }
@@ -311,12 +312,12 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ }
-static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, - pcpu_delegate_fn *func, - void *data, unsigned long stack) +static void pcpu_delegate(struct pcpu *pcpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned long source_cpu = stap(); + unsigned int source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { @@ -569,6 +570,9 @@ static void smp_ctl_bit_callback(void *info) __ctl_load(cregs, 0, 15); }
+static DEFINE_SPINLOCK(ctl_lock); +static unsigned long ctlreg; + /* * Set a bit in a control register of all cpus */ @@ -576,6 +580,11 @@ void smp_ctl_set_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+ spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __set_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -587,6 +596,11 @@ void smp_ctl_clear_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
+ spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __clear_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); @@ -895,14 +909,13 @@ static void smp_init_secondary(void) /* * Activate a secondary processor. */ -static void __no_sanitize_address smp_start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { S390_lowcore.restart_stack = (unsigned long) restart_stack; S390_lowcore.restart_fn = (unsigned long) do_restart; S390_lowcore.restart_data = 0; - S390_lowcore.restart_source = -1UL; - __ctl_load(S390_lowcore.cregs_save_area, 0, 15); - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + S390_lowcore.restart_source = -1U; + S390_lowcore.restart_flags = 0; call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack); }
linux-stable-mirror@lists.linaro.org