From: Charles Keepax ckeepax@opensource.cirrus.com
[ Upstream commit 0e793ba77c18382f08e440260fe72bc6fce2a3cb ]
Currently, the SPI core doesn't set the struct device fwnode pointer when it creates a new SPI device. This means when the device is registered the fwnode is NULL and the check in device_add which sets the fwnode->dev pointer is skipped. This wasn't previously an issue, however these two patches:
commit 4731210c09f5 ("gpiolib: Bind gpio_device to a driver to enable fw_devlink=on by default") commit ced2af419528 ("gpiolib: Don't probe gpio_device if it's not the primary device")
Added some code to the GPIO core which relies on using that fwnode->dev pointer to determine if a driver is bound to the fwnode and if not bind a stub GPIO driver. This means the GPIO providers behind SPI will get both the expected driver and this stub driver causing the stub driver to fail if it attempts to request any pin configuration. For example on my system:
madera-pinctrl madera-pinctrl: pin gpio5 already requested by madera-pinctrl; cannot claim for gpiochip3 madera-pinctrl madera-pinctrl: pin-4 (gpiochip3) status -22 madera-pinctrl madera-pinctrl: could not request pin 4 (gpio5) from group aif1 on device madera-pinctrl gpio_stub_drv gpiochip3: Error applying setting, reverse things back gpio_stub_drv: probe of gpiochip3 failed with error -22
The firmware node on the device created by the GPIO framework is set through the of_node pointer hence things generally actually work, however that fwnode->dev is never set, as the check was skipped at device_add time. This fix appears to match how the I2C subsystem handles the same situation.
Signed-off-by: Charles Keepax ckeepax@opensource.cirrus.com Link: https://lore.kernel.org/r/20210421101402.8468-1-ckeepax@opensource.cirrus.co... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index e353b7a9e54e..d83f91a6ab13 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2057,6 +2057,7 @@ of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc) /* Store a pointer to the node in the device structure */ of_node_get(nc); spi->dev.of_node = nc; + spi->dev.fwnode = of_fwnode_handle(nc);
/* Register the new device */ rc = spi_add_device(spi);
From: Alexey Gladkov legion@kernel.org
[ Upstream commit 905ae01c4ae2ae3df05bb141801b1db4b7d83c61 ]
For RLIMIT_NPROC and some other rlimits the user_struct that holds the global limit is kept alive for the lifetime of a process by keeping it in struct cred. Adding a pointer to ucounts in the struct cred will allow to track RLIMIT_NPROC not only for user in the system, but for user in the user_namespace.
Updating ucounts may require memory allocation which may fail. So, we cannot change cred.ucounts in the commit_creds() because this function cannot fail and it should always return 0. For this reason, we modify cred.ucounts before calling the commit_creds().
Changelog
v6: * Fix null-ptr-deref in is_ucounts_overlimit() detected by trinity. This error was caused by the fact that cred_alloc_blank() left the ucounts pointer empty.
Reported-by: kernel test robot oliver.sang@intel.com Signed-off-by: Alexey Gladkov legion@kernel.org Link: https://lkml.kernel.org/r/b37aaef28d8b9b0d757e07ba6dd27281bbe39259.161909442... Signed-off-by: Eric W. Biederman ebiederm@xmission.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/exec.c | 4 ++++ include/linux/cred.h | 2 ++ include/linux/user_namespace.h | 4 ++++ kernel/cred.c | 40 ++++++++++++++++++++++++++++++++++ kernel/fork.c | 6 +++++ kernel/sys.c | 12 ++++++++++ kernel/ucount.c | 40 +++++++++++++++++++++++++++++++--- kernel/user_namespace.c | 3 +++ 8 files changed, 108 insertions(+), 3 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c index 18594f11c31f..d7c4187ca023 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1360,6 +1360,10 @@ int begin_new_exec(struct linux_binprm * bprm) WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); flush_signal_handlers(me, 0);
+ retval = set_cred_ucounts(bprm->cred); + if (retval < 0) + goto out_unlock; + /* * install the new credentials for this executable */ diff --git a/include/linux/cred.h b/include/linux/cred.h index 14971322e1a0..65014e50d5fa 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -143,6 +143,7 @@ struct cred { #endif struct user_struct *user; /* real user ID subscription */ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ + struct ucounts *ucounts; struct group_info *group_info; /* supplementary groups for euid/fsgid */ /* RCU deletion */ union { @@ -169,6 +170,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); +extern int set_cred_ucounts(struct cred *);
/* * check for validity of credentials diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 1d08dbbcfe32..bfa6463f8a95 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -104,11 +104,15 @@ struct ucounts { };
extern struct user_namespace init_user_ns; +extern struct ucounts init_ucounts;
bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); +struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); +struct ucounts *get_ucounts(struct ucounts *ucounts); +void put_ucounts(struct ucounts *ucounts);
#ifdef CONFIG_USER_NS
diff --git a/kernel/cred.c b/kernel/cred.c index e1d274cd741b..642d62f7b46d 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -60,6 +60,7 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, + .ucounts = &init_ucounts, };
static inline void set_cred_subscribers(struct cred *cred, int n) @@ -119,6 +120,8 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); + if (cred->ucounts) + put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); kmem_cache_free(cred_jar, cred); } @@ -222,6 +225,7 @@ struct cred *cred_alloc_blank(void) #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif + new->ucounts = get_ucounts(&init_ucounts);
if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -284,6 +288,11 @@ struct cred *prepare_creds(void)
if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; + + new->ucounts = get_ucounts(new->ucounts); + if (!new->ucounts) + goto error; + validate_creds(new); return new;
@@ -363,6 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) ret = create_user_ns(new); if (ret < 0) goto error_put; + if (set_cred_ucounts(new) < 0) + goto error_put; }
#ifdef CONFIG_KEYS @@ -653,6 +664,31 @@ int cred_fscmp(const struct cred *a, const struct cred *b) } EXPORT_SYMBOL(cred_fscmp);
+int set_cred_ucounts(struct cred *new) +{ + struct task_struct *task = current; + const struct cred *old = task->real_cred; + struct ucounts *old_ucounts = new->ucounts; + + if (new->user == old->user && new->user_ns == old->user_ns) + return 0; + + /* + * This optimization is needed because alloc_ucounts() uses locks + * for table lookups. + */ + if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) + return 0; + + if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid))) + return -EAGAIN; + + if (old_ucounts) + put_ucounts(old_ucounts); + + return 0; +} + /* * initialise the credentials stuff */ @@ -719,6 +755,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error;
+ new->ucounts = get_ucounts(new->ucounts); + if (!new->ucounts) + goto error; + put_cred(old); validate_creds(new); return new; diff --git a/kernel/fork.c b/kernel/fork.c index a070caed5c8e..24a689df61c9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2997,6 +2997,12 @@ int ksys_unshare(unsigned long unshare_flags) if (err) goto bad_unshare_cleanup_cred;
+ if (new_cred) { + err = set_cred_ucounts(new_cred); + if (err) + goto bad_unshare_cleanup_cred; + } + if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { if (do_sysvsem) { /* diff --git a/kernel/sys.c b/kernel/sys.c index 3a583a29815f..142ee040f573 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -558,6 +558,10 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (retval < 0) goto error;
+ retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new);
error: @@ -616,6 +620,10 @@ long __sys_setuid(uid_t uid) if (retval < 0) goto error;
+ retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new);
error: @@ -691,6 +699,10 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (retval < 0) goto error;
+ retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new);
error: diff --git a/kernel/ucount.c b/kernel/ucount.c index 8d8874f1c35e..1f4455874aa0 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -8,6 +8,12 @@ #include <linux/kmemleak.h> #include <linux/user_namespace.h>
+struct ucounts init_ucounts = { + .ns = &init_user_ns, + .uid = GLOBAL_ROOT_UID, + .count = 1, +}; + #define UCOUNTS_HASHTABLE_BITS 10 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; static DEFINE_SPINLOCK(ucounts_lock); @@ -129,7 +135,15 @@ static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struc return NULL; }
-static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) +static void hlist_add_ucounts(struct ucounts *ucounts) +{ + struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); + spin_lock_irq(&ucounts_lock); + hlist_add_head(&ucounts->node, hashent); + spin_unlock_irq(&ucounts_lock); +} + +struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; @@ -164,7 +178,26 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) return ucounts; }
-static void put_ucounts(struct ucounts *ucounts) +struct ucounts *get_ucounts(struct ucounts *ucounts) +{ + unsigned long flags; + + if (!ucounts) + return NULL; + + spin_lock_irqsave(&ucounts_lock, flags); + if (ucounts->count == INT_MAX) { + WARN_ONCE(1, "ucounts: counter has reached its maximum value"); + ucounts = NULL; + } else { + ucounts->count += 1; + } + spin_unlock_irqrestore(&ucounts_lock, flags); + + return ucounts; +} + +void put_ucounts(struct ucounts *ucounts) { unsigned long flags;
@@ -198,7 +231,7 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, { struct ucounts *ucounts, *iter, *bad; struct user_namespace *tns; - ucounts = get_ucounts(ns, uid); + ucounts = alloc_ucounts(ns, uid); for (iter = ucounts; iter; iter = tns->ucounts) { int max; tns = iter->ns; @@ -241,6 +274,7 @@ static __init int user_namespace_sysctl_init(void) BUG_ON(!user_header); BUG_ON(!setup_userns_sysctls(&init_user_ns)); #endif + hlist_add_ucounts(&init_ucounts); return 0; } subsys_initcall(user_namespace_sysctl_init); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8d62863721b0..27670ab7a4ed 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -1340,6 +1340,9 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns) put_user_ns(cred->user_ns); set_cred_user_ns(cred, get_user_ns(user_ns));
+ if (set_cred_ucounts(cred) < 0) + return -EINVAL; + return 0; }
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit e90812c47b958407b54d05780dc483fdc1b57a93 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
Reviewed-by: Ezequiel Garcia ezequiel@collabora.com Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/rkvdec/rkvdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index d821661d30f3..8c17615f3a7a 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -658,7 +658,7 @@ static void rkvdec_device_run(void *priv) if (WARN_ON(!desc)) return;
- ret = pm_runtime_get_sync(rkvdec->dev); + ret = pm_runtime_resume_and_get(rkvdec->dev); if (ret < 0) { rkvdec_job_finish_no_pm(ctx, VB2_BUF_STATE_ERROR); return;
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 62c90446868b439929cb04395f04a709a64ae04b ]
The PM runtime get logic is currently broken, as it checks if ret is zero instead of checking if it is an error code, as reported by Dan Carpenter.
While here, use the pm_runtime_resume_and_get() as added by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") added pm_runtime_resume_and_get() in order to automatically handle dev->power.usage_count decrement on errors. As a bonus, such function always return zero on success.
It should also be noticed that a fail of pm_runtime_get_sync() would potentially result in a spurious runtime_suspend(), instead of using pm_runtime_put_noidle().
Reported-by: Dan Carpenter dan.carpenter@oracle.com Reviewed-by: Daniele Alessandrelli daniele.alessandrelli@intel.com Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/i2c/imx334.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/media/i2c/imx334.c b/drivers/media/i2c/imx334.c index 047aa7658d21..23f28606e570 100644 --- a/drivers/media/i2c/imx334.c +++ b/drivers/media/i2c/imx334.c @@ -717,9 +717,9 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable) }
if (enable) { - ret = pm_runtime_get_sync(imx334->dev); - if (ret) - goto error_power_off; + ret = pm_runtime_resume_and_get(imx334->dev); + if (ret < 0) + goto error_unlock;
ret = imx334_start_streaming(imx334); if (ret) @@ -737,6 +737,7 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable)
error_power_off: pm_runtime_put(imx334->dev); +error_unlock: mutex_unlock(&imx334->mutex);
return ret;
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit e7c617cab7a522fba5b20f9033ee98565b6f3546 ]
Calling pm_runtime_get_sync() is bad, since even when it returns an error, pm_runtime_put*() should be called. So, use instead pm_runtime_resume_and_get().
While here, ensure that the error condition will be checked during clock enable an media open() calls.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/marvell-ccic/mcam-core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 141bf5d97a04..ea87110d9073 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -918,6 +918,7 @@ static int mclk_enable(struct clk_hw *hw) struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); int mclk_src; int mclk_div; + int ret;
/* * Clock the sensor appropriately. Controller clock should @@ -931,7 +932,9 @@ static int mclk_enable(struct clk_hw *hw) mclk_div = 2; }
- pm_runtime_get_sync(cam->dev); + ret = pm_runtime_resume_and_get(cam->dev); + if (ret < 0) + return ret; clk_enable(cam->clk[0]); mcam_reg_write(cam, REG_CLKCTRL, (mclk_src << 29) | mclk_div); mcam_ctlr_power_up(cam); @@ -1611,7 +1614,9 @@ static int mcam_v4l_open(struct file *filp) ret = sensor_call(cam, core, s_power, 1); if (ret) goto out; - pm_runtime_get_sync(cam->dev); + ret = pm_runtime_resume_and_get(cam->dev); + if (ret < 0) + goto out; __mcam_cam_reset(cam); mcam_set_config_needed(cam, 1); }
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit d07bb9702cf5f5ccf3fb661e6cab54bbc33cd23f ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
While here, fix the return contition of mtk_mdp_m2m_start_streaming(), as it doesn't make any sense to return 0 if the PM runtime failed to resume.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c index ace4528cdc5e..f14779e7596e 100644 --- a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c +++ b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c @@ -391,12 +391,12 @@ static int mtk_mdp_m2m_start_streaming(struct vb2_queue *q, unsigned int count) struct mtk_mdp_ctx *ctx = q->drv_priv; int ret;
- ret = pm_runtime_get_sync(&ctx->mdp_dev->pdev->dev); + ret = pm_runtime_resume_and_get(&ctx->mdp_dev->pdev->dev); if (ret < 0) - mtk_mdp_dbg(1, "[%d] pm_runtime_get_sync failed:%d", + mtk_mdp_dbg(1, "[%d] pm_runtime_resume_and_get failed:%d", ctx->id, ret);
- return 0; + return ret; }
static void *mtk_mdp_m2m_buf_remove(struct mtk_mdp_ctx *ctx,
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit fdc34e82c0f968ac4c157bd3d8c299ebc24c9c63 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
While here, check if the PM runtime error was caught at s5p_cec_adap_enable().
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Reviewed-by: Sylwester Nawrocki s.nawrocki@samsung.com Acked-by: Marek Szyprowski m.szyprowski@samsung.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/cec/platform/s5p/s5p_cec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c index 2a3e7ffefe0a..2250c1cbc64e 100644 --- a/drivers/media/cec/platform/s5p/s5p_cec.c +++ b/drivers/media/cec/platform/s5p/s5p_cec.c @@ -35,10 +35,13 @@ MODULE_PARM_DESC(debug, "debug level (0-2)");
static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable) { + int ret; struct s5p_cec_dev *cec = cec_get_drvdata(adap);
if (enable) { - pm_runtime_get_sync(cec->dev); + ret = pm_runtime_resume_and_get(cec->dev); + if (ret < 0) + return ret;
s5p_cec_reset(cec);
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit c41e02493334985cca1a22efd5ca962ce3abb061 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
While here, ensure that the driver will check if PM runtime resumed at vpfe_initialize_device().
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/am437x/am437x-vpfe.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index 6cdc77dda0e4..1c9cb9e05fdf 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1021,7 +1021,9 @@ static int vpfe_initialize_device(struct vpfe_device *vpfe) if (ret) return ret;
- pm_runtime_get_sync(vpfe->pdev); + ret = pm_runtime_resume_and_get(vpfe->pdev); + if (ret < 0) + return ret;
vpfe_config_enable(&vpfe->ccdc, 1);
@@ -2443,7 +2445,11 @@ static int vpfe_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev);
/* for now just enable it here instead of waiting for the open */ - pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + vpfe_err(vpfe, "Unable to resume device.\n"); + goto probe_out_v4l2_unregister; + }
vpfe_ccdc_config_defaults(ccdc);
@@ -2530,6 +2536,11 @@ static int vpfe_suspend(struct device *dev)
/* only do full suspend if streaming has started */ if (vb2_start_streaming_called(&vpfe->buffer_queue)) { + /* + * ignore RPM resume errors here, as it is already too late. + * A check like that should happen earlier, either at + * open() or just before start streaming. + */ pm_runtime_get_sync(dev); vpfe_config_enable(ccdc, 1);
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 6e8b1526db164c9d4b9dacfb9bc48e365d7c4860 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
While here, check if the PM runtime error was caught at open time.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/sh_vou.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/media/platform/sh_vou.c b/drivers/media/platform/sh_vou.c index 4ac48441f22c..ca4310e26c49 100644 --- a/drivers/media/platform/sh_vou.c +++ b/drivers/media/platform/sh_vou.c @@ -1133,7 +1133,11 @@ static int sh_vou_open(struct file *file) if (v4l2_fh_is_singular_file(file) && vou_dev->status == SH_VOU_INITIALISING) { /* First open */ - pm_runtime_get_sync(vou_dev->v4l2_dev.dev); + err = pm_runtime_resume_and_get(vou_dev->v4l2_dev.dev); + if (err < 0) { + v4l2_fh_release(file); + goto done_open; + } err = sh_vou_hw_init(vou_dev); if (err < 0) { pm_runtime_put(vou_dev->v4l2_dev.dev);
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 908711f542c17fe61e5d653da1beb8e5ab5c7b50 ]
Currently, the driver just assumes that PM runtime logic succeded resuming the device.
That may not be the case, as pm_runtime_get_sync() can fail (but keeping the usage count incremented).
Replace the code to use pm_runtime_resume_and_get(), and letting it return the error code.
This way, if mtk_vcodec_dec_pw_on() fails, the logic under fops_vcodec_open() will do the right thing and return an error, instead of just assuming that the device is ready to be used.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 4 +++- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 8 +++++--- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c index 147dfef1638d..f87dc47d9e63 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c @@ -126,7 +126,9 @@ static int fops_vcodec_open(struct file *file) mtk_vcodec_dec_set_default_params(ctx);
if (v4l2_fh_is_singular(&ctx->fh)) { - mtk_vcodec_dec_pw_on(&dev->pm); + ret = mtk_vcodec_dec_pw_on(&dev->pm); + if (ret < 0) + goto err_load_fw; /* * Does nothing if firmware was already loaded. */ diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c index ddee7046ce42..6038db96f71c 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c @@ -88,13 +88,15 @@ void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev) put_device(dev->pm.larbvdec); }
-void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) +int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) { int ret;
- ret = pm_runtime_get_sync(pm->dev); + ret = pm_runtime_resume_and_get(pm->dev); if (ret) - mtk_v4l2_err("pm_runtime_get_sync fail %d", ret); + mtk_v4l2_err("pm_runtime_resume_and_get fail %d", ret); + + return ret; }
void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h index 872d8bf8cfaf..280aeaefdb65 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h @@ -12,7 +12,7 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *dev); void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev);
-void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm); +int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_clock_off(struct mtk_vcodec_pm *pm);
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 10343de268d10cf07b092b8b525e12ad558ead77 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
As a plus, pm_runtime_resume_and_get() doesn't return positive numbers, so the return code validation can be removed.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Reviewed-by: Sylwester Nawrocki s.nawrocki@samsung.com Acked-by: Andrzej Pietrasiewicz andrzejtp2010@gmail.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/s5p-jpeg/jpeg-core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 026111505f5a..d402e456f27d 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -2566,11 +2566,8 @@ static void s5p_jpeg_buf_queue(struct vb2_buffer *vb) static int s5p_jpeg_start_streaming(struct vb2_queue *q, unsigned int count) { struct s5p_jpeg_ctx *ctx = vb2_get_drv_priv(q); - int ret; - - ret = pm_runtime_get_sync(ctx->jpeg->dev);
- return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(ctx->jpeg->dev); }
static void s5p_jpeg_stop_streaming(struct vb2_queue *q)
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 9c298f82d8392f799a0595f50076afa1d91e9092 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c index 3f81dd17755c..fbcca59a0517 100644 --- a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c +++ b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c @@ -494,7 +494,7 @@ static int rotate_start_streaming(struct vb2_queue *vq, unsigned int count) struct device *dev = ctx->dev->dev; int ret;
- ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "Failed to enable module\n");
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit c44eac5b72e23c31eefc0e10a71d9650036b8341 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors.
The bdisp_start_streaming() doesn't take it into account, which would unbalance PM usage counter at bdisp_stop_streaming().
The logic at bdisp_probe() is correct, but the best is to use the same call along the driver.
So, replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c index 060ca85f64d5..85288da9d2ae 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c +++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c @@ -499,7 +499,7 @@ static int bdisp_start_streaming(struct vb2_queue *q, unsigned int count) { struct bdisp_ctx *ctx = q->drv_priv; struct vb2_v4l2_buffer *buf; - int ret = pm_runtime_get_sync(ctx->bdisp_dev->dev); + int ret = pm_runtime_resume_and_get(ctx->bdisp_dev->dev);
if (ret < 0) { dev_err(ctx->bdisp_dev->dev, "failed to set runtime PM\n"); @@ -1364,10 +1364,10 @@ static int bdisp_probe(struct platform_device *pdev)
/* Power management */ pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "failed to set PM\n"); - goto err_pm; + goto err_remove; }
/* Filters */ @@ -1395,6 +1395,7 @@ static int bdisp_probe(struct platform_device *pdev) bdisp_hw_free_filters(bdisp->dev); err_pm: pm_runtime_put(dev); +err_remove: bdisp_debugfs_remove(bdisp); v4l2_device_unregister(&bdisp->v4l2_dev); err_clk:
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 59f96244af9403ddf4810ec5c0fbe8920857634e ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors.
On some places, this is ok, but on others the usage count ended being unbalanced on failures.
Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
As a bonus, such function always return zero on success. So, some code can be simplified.
Reviewed-by: Sylwester Nawrocki s.nawrocki@samsung.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/exynos4-is/fimc-capture.c | 6 ++---- drivers/media/platform/exynos4-is/fimc-is.c | 4 ++-- drivers/media/platform/exynos4-is/fimc-isp-video.c | 3 +-- drivers/media/platform/exynos4-is/fimc-isp.c | 7 +++---- drivers/media/platform/exynos4-is/fimc-lite.c | 5 +++-- drivers/media/platform/exynos4-is/fimc-m2m.c | 5 +---- drivers/media/platform/exynos4-is/media-dev.c | 9 +++------ drivers/media/platform/exynos4-is/mipi-csis.c | 10 ++++------ 8 files changed, 19 insertions(+), 30 deletions(-)
diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c index 13c838d3f947..0da36443173c 100644 --- a/drivers/media/platform/exynos4-is/fimc-capture.c +++ b/drivers/media/platform/exynos4-is/fimc-capture.c @@ -478,11 +478,9 @@ static int fimc_capture_open(struct file *file) goto unlock;
set_bit(ST_CAPT_BUSY, &fimc->state); - ret = pm_runtime_get_sync(&fimc->pdev->dev); - if (ret < 0) { - pm_runtime_put_sync(&fimc->pdev->dev); + ret = pm_runtime_resume_and_get(&fimc->pdev->dev); + if (ret < 0) goto unlock; - }
ret = v4l2_fh_open(file); if (ret) { diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index 972d9601d236..1b24f5bfc4af 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -828,9 +828,9 @@ static int fimc_is_probe(struct platform_device *pdev) goto err_irq; }
- ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) - goto err_pm; + goto err_irq;
vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index 612b9872afc8..8d9dc597deaa 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -275,7 +275,7 @@ static int isp_video_open(struct file *file) if (ret < 0) goto unlock;
- ret = pm_runtime_get_sync(&isp->pdev->dev); + ret = pm_runtime_resume_and_get(&isp->pdev->dev); if (ret < 0) goto rel_fh;
@@ -293,7 +293,6 @@ static int isp_video_open(struct file *file) if (!ret) goto unlock; rel_fh: - pm_runtime_put_noidle(&isp->pdev->dev); v4l2_fh_release(file); unlock: mutex_unlock(&isp->video_lock); diff --git a/drivers/media/platform/exynos4-is/fimc-isp.c b/drivers/media/platform/exynos4-is/fimc-isp.c index a77c49b18511..74b49d30901e 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp.c +++ b/drivers/media/platform/exynos4-is/fimc-isp.c @@ -304,11 +304,10 @@ static int fimc_isp_subdev_s_power(struct v4l2_subdev *sd, int on) pr_debug("on: %d\n", on);
if (on) { - ret = pm_runtime_get_sync(&is->pdev->dev); - if (ret < 0) { - pm_runtime_put(&is->pdev->dev); + ret = pm_runtime_resume_and_get(&is->pdev->dev); + if (ret < 0) return ret; - } + set_bit(IS_ST_PWR_ON, &is->state);
ret = fimc_is_start_firmware(is); diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c index fe20af3a7178..4d8b18078ff3 100644 --- a/drivers/media/platform/exynos4-is/fimc-lite.c +++ b/drivers/media/platform/exynos4-is/fimc-lite.c @@ -469,9 +469,9 @@ static int fimc_lite_open(struct file *file) }
set_bit(ST_FLITE_IN_USE, &fimc->state); - ret = pm_runtime_get_sync(&fimc->pdev->dev); + ret = pm_runtime_resume_and_get(&fimc->pdev->dev); if (ret < 0) - goto err_pm; + goto err_in_use;
ret = v4l2_fh_open(file); if (ret < 0) @@ -499,6 +499,7 @@ static int fimc_lite_open(struct file *file) v4l2_fh_release(file); err_pm: pm_runtime_put_sync(&fimc->pdev->dev); +err_in_use: clear_bit(ST_FLITE_IN_USE, &fimc->state); unlock: mutex_unlock(&fimc->lock); diff --git a/drivers/media/platform/exynos4-is/fimc-m2m.c b/drivers/media/platform/exynos4-is/fimc-m2m.c index c9704a147e5c..df8e2aa454d8 100644 --- a/drivers/media/platform/exynos4-is/fimc-m2m.c +++ b/drivers/media/platform/exynos4-is/fimc-m2m.c @@ -73,17 +73,14 @@ static void fimc_m2m_shutdown(struct fimc_ctx *ctx) static int start_streaming(struct vb2_queue *q, unsigned int count) { struct fimc_ctx *ctx = q->drv_priv; - int ret;
- ret = pm_runtime_get_sync(&ctx->fimc_dev->pdev->dev); - return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(&ctx->fimc_dev->pdev->dev); }
static void stop_streaming(struct vb2_queue *q) { struct fimc_ctx *ctx = q->drv_priv;
- fimc_m2m_shutdown(ctx); fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR); pm_runtime_put(&ctx->fimc_dev->pdev->dev); diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c index 13d192ba4aa6..e025178db06c 100644 --- a/drivers/media/platform/exynos4-is/media-dev.c +++ b/drivers/media/platform/exynos4-is/media-dev.c @@ -512,11 +512,9 @@ static int fimc_md_register_sensor_entities(struct fimc_md *fmd) if (!fmd->pmf) return -ENXIO;
- ret = pm_runtime_get_sync(fmd->pmf); - if (ret < 0) { - pm_runtime_put(fmd->pmf); + ret = pm_runtime_resume_and_get(fmd->pmf); + if (ret < 0) return ret; - }
fmd->num_sensors = 0;
@@ -1291,8 +1289,7 @@ static int cam_clk_prepare(struct clk_hw *hw) if (camclk->fmd->pmf == NULL) return -ENODEV;
- ret = pm_runtime_get_sync(camclk->fmd->pmf); - return ret < 0 ? ret : 0; + return pm_runtime_resume_and_get(camclk->fmd->pmf); }
static void cam_clk_unprepare(struct clk_hw *hw) diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c index 1aac167abb17..ebf39c856894 100644 --- a/drivers/media/platform/exynos4-is/mipi-csis.c +++ b/drivers/media/platform/exynos4-is/mipi-csis.c @@ -494,7 +494,7 @@ static int s5pcsis_s_power(struct v4l2_subdev *sd, int on) struct device *dev = &state->pdev->dev;
if (on) - return pm_runtime_get_sync(dev); + return pm_runtime_resume_and_get(dev);
return pm_runtime_put_sync(dev); } @@ -509,11 +509,9 @@ static int s5pcsis_s_stream(struct v4l2_subdev *sd, int enable)
if (enable) { s5pcsis_clear_counters(state); - ret = pm_runtime_get_sync(&state->pdev->dev); - if (ret && ret != 1) { - pm_runtime_put_noidle(&state->pdev->dev); + ret = pm_runtime_resume_and_get(&state->pdev->dev); + if (ret < 0) return ret; - } }
mutex_lock(&state->lock); @@ -535,7 +533,7 @@ static int s5pcsis_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) pm_runtime_put(&state->pdev->dev);
- return ret == 1 ? 0 : ret; + return ret; }
static int s5pcsis_enum_mbus_code(struct v4l2_subdev *sd,
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 59087b66ea6730c130c57d23bd9fd139b78c1ba5 ]
The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak.
As a bonus, as pm_runtime_get_sync() always return 0 on success, the logic can be simplified.
Reviewed-by: Jonathan Cameron Jonathan.Cameron@huawei.com Reviewed-by: Sylwester Nawrocki s.nawrocki@samsung.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/exynos-gsc/gsc-m2m.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c index 27a3c92c73bc..f1cf847d1cc2 100644 --- a/drivers/media/platform/exynos-gsc/gsc-m2m.c +++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c @@ -56,10 +56,8 @@ static void __gsc_m2m_job_abort(struct gsc_ctx *ctx) static int gsc_m2m_start_streaming(struct vb2_queue *q, unsigned int count) { struct gsc_ctx *ctx = q->drv_priv; - int ret;
- ret = pm_runtime_get_sync(&ctx->gsc_dev->pdev->dev); - return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(&ctx->gsc_dev->pdev->dev); }
static void __gsc_m2m_cleanup_queue(struct gsc_ctx *ctx)
From: Jay Fang f.fangjian@huawei.com
[ Upstream commit 9e37a3ab0627011fb63875e9a93094b6fc8ddf48 ]
In function 'spi_test_run_iter': Value 'tx_buf' might be 'rx_buf'.
Signed-off-by: Jay Fang f.fangjian@huawei.com Link: https://lore.kernel.org/r/1620629903-15493-5-git-send-email-f.fangjian@huawe... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-loopback-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index f1cf2232f0b5..4d4f77a186a9 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -875,7 +875,7 @@ static int spi_test_run_iter(struct spi_device *spi, test.transfers[i].len = len; if (test.transfers[i].tx_buf) test.transfers[i].tx_buf += tx_off; - if (test.transfers[i].tx_buf) + if (test.transfers[i].rx_buf) test.transfers[i].rx_buf += rx_off; }
From: Jay Fang f.fangjian@huawei.com
[ Upstream commit 026a1dc1af52742c5897e64a3431445371a71871 ]
pch_spi_set_tx() frees data->pkt_tx_buff on failure of kzalloc() for data->pkt_rx_buff, but its caller, pch_spi_process_messages(), will free data->pkt_tx_buff again. Set data->pkt_tx_buff to NULL after kfree() to avoid double free.
Signed-off-by: Jay Fang f.fangjian@huawei.com Link: https://lore.kernel.org/r/1620284888-65215-1-git-send-email-f.fangjian@huawe... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-topcliff-pch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index b8870784fc6e..8c4615b76339 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -580,8 +580,10 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw) data->pkt_tx_buff = kzalloc(size, GFP_KERNEL); if (data->pkt_tx_buff != NULL) { data->pkt_rx_buff = kzalloc(size, GFP_KERNEL); - if (!data->pkt_rx_buff) + if (!data->pkt_rx_buff) { kfree(data->pkt_tx_buff); + data->pkt_tx_buff = NULL; + } }
if (!data->pkt_rx_buff) {
From: Tian Tao tiantao6@hisilicon.com
[ Upstream commit e7a1a3abea373e41ba7dfe0fbc93cb79b6a3a529 ]
word_len should be checked in the omap1_spi100k_setup_transfer function to see if it exceeds 32.
Signed-off-by: Tian Tao tiantao6@hisilicon.com Link: https://lore.kernel.org/r/1619695248-39045-1-git-send-email-tiantao6@hisilic... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-omap-100k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c index 7062f2902253..f104470605b3 100644 --- a/drivers/spi/spi-omap-100k.c +++ b/drivers/spi/spi-omap-100k.c @@ -241,7 +241,7 @@ static int omap1_spi100k_setup_transfer(struct spi_device *spi, else word_len = spi->bits_per_word;
- if (spi->bits_per_word > 32) + if (word_len > 32) return -EINVAL; cs->word_len = word_len;
From: Zou Wei zou_wei@huawei.com
[ Upstream commit d019f38a1af3c6015cde6a47951a3ec43beeed80 ]
This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zou Wei zou_wei@huawei.com Link: https://lore.kernel.org/r/1620705198-104566-1-git-send-email-zou_wei@huawei.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/uniphier-regulator.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/regulator/uniphier-regulator.c b/drivers/regulator/uniphier-regulator.c index 2e02e26b516c..e75b0973e325 100644 --- a/drivers/regulator/uniphier-regulator.c +++ b/drivers/regulator/uniphier-regulator.c @@ -201,6 +201,7 @@ static const struct of_device_id uniphier_regulator_match[] = { }, { /* Sentinel */ }, }; +MODULE_DEVICE_TABLE(of, uniphier_regulator_match);
static struct platform_driver uniphier_regulator_driver = { .probe = uniphier_regulator_probe,
From: Valentin Schneider valentin.schneider@arm.com
[ Upstream commit f1a0a376ca0c4ef1fc3d24e3e502acbb5b795674 ]
As pointed out by commit
de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread")
init_idle() can and will be invoked more than once on the same idle task. At boot time, it is invoked for the boot CPU thread by sched_init(). Then smp_init() creates the threads for all the secondary CPUs and invokes init_idle() on them.
As the hotplug machinery brings the secondaries to life, it will issue calls to idle_thread_get(), which itself invokes init_idle() yet again. In this case it's invoked twice more per secondary: at _cpu_up(), and at bringup_cpu().
Given smp_init() already initializes the idle tasks for all *possible* CPUs, no further initialization should be required. Now, removing init_idle() from idle_thread_get() exposes some interesting expectations with regards to the idle task's preempt_count: the secondary startup always issues a preempt_disable(), requiring some reset of the preempt count to 0 between hot-unplug and hotplug, which is currently served by idle_thread_get() -> idle_init().
Given the idle task is supposed to have preemption disabled once and never see it re-enabled, it seems that what we actually want is to initialize its preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove init_idle() from idle_thread_get().
Secondary startups were patched via coccinelle:
@begone@ @@
-preempt_disable(); ... cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Ingo Molnar mingo@kernel.org Acked-by: Peter Zijlstra peterz@infradead.org Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.co... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/alpha/kernel/smp.c | 1 - arch/arc/kernel/smp.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm64/include/asm/preempt.h | 2 +- arch/arm64/kernel/smp.c | 1 - arch/csky/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/mips/kernel/smp.c | 1 - arch/openrisc/kernel/smp.c | 2 -- arch/parisc/kernel/smp.c | 1 - arch/powerpc/kernel/smp.c | 1 - arch/riscv/kernel/smpboot.c | 1 - arch/s390/include/asm/preempt.h | 4 ++-- arch/s390/kernel/smp.c | 1 - arch/sh/kernel/smp.c | 2 -- arch/sparc/kernel/smp_32.c | 1 - arch/sparc/kernel/smp_64.c | 3 --- arch/x86/include/asm/preempt.h | 2 +- arch/x86/kernel/smpboot.c | 1 - arch/xtensa/kernel/smp.c | 1 - include/asm-generic/preempt.h | 2 +- init/main.c | 6 +----- kernel/fork.c | 2 +- kernel/sched/core.c | 2 +- kernel/smpboot.c | 1 - 25 files changed, 8 insertions(+), 34 deletions(-)
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index f4dd9f3f3001..4b2575f936d4 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,7 +166,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm));
- preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); }
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 52906d314537..db0e104d6835 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -189,7 +189,6 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); }
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 74679240a9d8..c7bb168b0d97 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -432,7 +432,6 @@ asmlinkage void secondary_start_kernel(void) #endif pr_debug("CPU%u: Booted secondary processor\n", cpu);
- preempt_disable(); trace_hardirqs_off();
/* diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index 80e946b2abee..e83f0982b99c 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc) } while (0)
#define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0)
static inline void set_preempt_need_resched(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..6671000a8b7d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -224,7 +224,6 @@ asmlinkage notrace void secondary_start_kernel(void) init_gic_priority_masking();
rcu_cpu_starting(cpu); - preempt_disable(); trace_hardirqs_off();
/* diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 0f9f5eef9338..e2993539af8e 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -281,7 +281,6 @@ void csky_start_secondary(void) pr_info("CPU%u Online: %s...\n", cpu, __func__);
local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); }
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 49b488580939..d10f780c13b9 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -441,7 +441,6 @@ start_secondary (void *unused) #endif efi_map_pal_code(); cpu_init(); - preempt_disable(); smp_callin();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index ef86fbad8546..d542fb7af3ba 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -348,7 +348,6 @@ asmlinkage void start_secondary(void) */
calibrate_delay(); - preempt_disable(); cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy;
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 48e1092a64de..415e209732a3 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -145,8 +145,6 @@ asmlinkage __init void secondary_start_kernel(void) set_cpu_online(cpu, true);
local_irq_enable(); - - preempt_disable(); /* * OK, it's off to the idle thread for us */ diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 10227f667c8a..1405b603b91b 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -302,7 +302,6 @@ void __init smp_callin(unsigned long pdce_proc) #endif
smp_cpu_init(slave_id); - preempt_disable();
flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2e05c783440a..6c6e4d934d86 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1547,7 +1547,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); rcu_cpu_starting(cpu); - preempt_disable(); cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 9a408e2942ac..bd82375db51a 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -180,7 +180,6 @@ asmlinkage __visible void smp_callin(void) * Disable preemption before enabling interrupts, so we don't try to * schedule a CPU that hasn't actually started yet. */ - preempt_disable(); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b49e0492842c..23ff51be7e29 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -32,7 +32,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0)
static inline void set_preempt_need_resched(void) @@ -91,7 +91,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0)
static inline void set_preempt_need_resched(void) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2fec2b80d35d..111909aeb8d2 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -878,7 +878,6 @@ static void smp_init_secondary(void) restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); - preempt_disable(); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 372acdc9033e..65924d9ec245 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -186,8 +186,6 @@ asmlinkage void start_secondary(void)
per_cpu_trap_init();
- preempt_disable(); - notify_cpu_starting(cpu);
local_irq_enable(); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 50c127ab46d5..22b148e5a5f8 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg) */ arch_cpu_pre_starting(arg);
- preempt_disable(); cpu = smp_processor_id();
notify_cpu_starting(cpu); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e38d8bf454e8..ae5faa1d989d 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -138,9 +138,6 @@ void smp_callin(void)
set_cpu_online(cpuid, true);
- /* idle thread is expected to have preempt disabled */ - preempt_disable(); - local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index f8cb8af4de5c..fe5efbcba824 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0)
/* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7770245cc7fa..ec2d64aa2163 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -236,7 +236,6 @@ static void notrace start_secondary(void *unused) cpu_init(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - preempt_disable(); smp_callin();
enable_start_cpu0 = 0; diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index cd85a7a2722b..1254da07ead1 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -145,7 +145,6 @@ void secondary_start_kernel(void) cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current);
- preempt_disable(); trace_hardirqs_off();
calibrate_delay(); diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d683f5e6d791..b4d43a4af5f7 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc) } while (0)
#define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0)
static __always_inline void set_preempt_need_resched(void) diff --git a/init/main.c b/init/main.c index e9c42a183e33..e6836a9400d5 100644 --- a/init/main.c +++ b/init/main.c @@ -941,11 +941,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); - /* - * Disable preemption - early bootup scheduling is extremely - * fragile until we cpu_idle() for the first time. - */ - preempt_disable(); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index 24a689df61c9..98b4bc7f0d7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2407,7 +2407,7 @@ static inline void init_idle_pids(struct task_struct *idle) } }
-struct task_struct *fork_idle(int cpu) +struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4ca80df205ce..0a90d4d7663b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7433,7 +7433,7 @@ void show_state_filter(unsigned long state_filter) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu) +void __init init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f25208e8df83..e4163042c4d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -33,7 +33,6 @@ struct task_struct *idle_thread_get(unsigned int cpu)
if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu); return tsk; }
From: Łukasz Stelmach l.stelmach@samsung.com
[ Upstream commit 0cdbabf8bb7a6147f5adf37dbc251e92a1bbc2c7 ]
pm_runtime_resume_and_get() wraps around pm_runtime_get_sync() and decrements the runtime PM usage counter in case the latter function fails and keeps the counter balanced.
Signed-off-by: Łukasz Stelmach l.stelmach@samsung.com Reviewed-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/char/hw_random/exynos-trng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c index 8e1fe3f8dd2d..c8db62bc5ff7 100644 --- a/drivers/char/hw_random/exynos-trng.c +++ b/drivers/char/hw_random/exynos-trng.c @@ -132,7 +132,7 @@ static int exynos_trng_probe(struct platform_device *pdev) return PTR_ERR(trng->mem);
pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Could not get runtime PM.\n"); goto err_pm_get; @@ -165,7 +165,7 @@ static int exynos_trng_probe(struct platform_device *pdev) clk_disable_unprepare(trng->clk);
err_clock: - pm_runtime_put_sync(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev);
err_pm_get: pm_runtime_disable(&pdev->dev);
From: Bixuan Cui cuibixuan@huawei.com
[ Upstream commit 06676aa1f455c74e3ad1624cea3acb9ed2ef71ae ]
This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Bixuan Cui cuibixuan@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/nx/nx-842-pseries.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index cc8dd3072b8b..8ee547ee378e 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -1069,6 +1069,7 @@ static const struct vio_device_id nx842_vio_driver_ids[] = { {"ibm,compression-v1", "ibm,compression"}, {"", ""}, }; +MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids);
static struct vio_driver nx842_vio_driver = { .name = KBUILD_MODNAME,
From: Lucas Tanure tanureal@opensource.cirrus.com
[ Upstream commit ea030ca688193462b8d612c1628c37129aa30072 ]
Set regmap raw read/write from i2c quirks max read/write so regmap_raw_read/write can split the access into chunks
Signed-off-by: Lucas Tanure tanureal@opensource.cirrus.com Link: https://lore.kernel.org/r/20210512135222.223203-1-tanureal@opensource.cirrus... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/regmap/regmap-i2c.c | 45 +++++++++++++++++++++++++++----- drivers/base/regmap/regmap.c | 2 ++ include/linux/regmap.h | 2 ++ 3 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index 62b95a9212ae..980e5ce6a3a3 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -306,33 +306,64 @@ static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = { static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c, const struct regmap_config *config) { + const struct i2c_adapter_quirks *quirks; + const struct regmap_bus *bus = NULL; + struct regmap_bus *ret_bus; + u16 max_read = 0, max_write = 0; + if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C)) - return ®map_i2c; + bus = ®map_i2c; else if (config->val_bits == 8 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) - return ®map_i2c_smbus_i2c_block; + bus = ®map_i2c_smbus_i2c_block; else if (config->val_bits == 8 && config->reg_bits == 16 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) - return ®map_i2c_smbus_i2c_block_reg16; + bus = ®map_i2c_smbus_i2c_block_reg16; else if (config->val_bits == 16 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_WORD_DATA)) switch (regmap_get_val_endian(&i2c->dev, NULL, config)) { case REGMAP_ENDIAN_LITTLE: - return ®map_smbus_word; + bus = ®map_smbus_word; + break; case REGMAP_ENDIAN_BIG: - return ®map_smbus_word_swapped; + bus = ®map_smbus_word_swapped; + break; default: /* everything else is not supported */ break; } else if (config->val_bits == 8 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - return ®map_smbus_byte; + bus = ®map_smbus_byte; + + if (!bus) + return ERR_PTR(-ENOTSUPP); + + quirks = i2c->adapter->quirks; + if (quirks) { + if (quirks->max_read_len && + (bus->max_raw_read == 0 || bus->max_raw_read > quirks->max_read_len)) + max_read = quirks->max_read_len; + + if (quirks->max_write_len && + (bus->max_raw_write == 0 || bus->max_raw_write > quirks->max_write_len)) + max_write = quirks->max_write_len; + + if (max_read || max_write) { + ret_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); + if (!ret_bus) + return ERR_PTR(-ENOMEM); + ret_bus->free_on_exit = true; + ret_bus->max_raw_read = max_read; + ret_bus->max_raw_write = max_write; + bus = ret_bus; + } + }
- return ERR_PTR(-ENOTSUPP); + return bus; }
struct regmap *__regmap_init_i2c(struct i2c_client *i2c, diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 297e95be25b3..0d185ec018a5 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1496,6 +1496,8 @@ void regmap_exit(struct regmap *map) mutex_destroy(&map->mutex); kfree_const(map->name); kfree(map->patch); + if (map->bus && map->bus->free_on_exit) + kfree(map->bus); kfree(map); } EXPORT_SYMBOL_GPL(regmap_exit); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f87a11a5cc4a..8c16e6fa0f66 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -502,6 +502,7 @@ typedef void (*regmap_hw_free_context)(void *context); * DEFAULT, BIG is assumed. * @max_raw_read: Max raw read size that can be used on the bus. * @max_raw_write: Max raw write size that can be used on the bus. + * @free_on_exit: kfree this on exit of regmap */ struct regmap_bus { bool fast_io; @@ -519,6 +520,7 @@ struct regmap_bus { enum regmap_endian val_format_endian_default; size_t max_raw_read; size_t max_raw_write; + bool free_on_exit; };
/*
On Sun, Jul 04, 2021 at 07:03:18PM -0400, Sasha Levin wrote:
From: Lucas Tanure tanureal@opensource.cirrus.com
[ Upstream commit ea030ca688193462b8d612c1628c37129aa30072 ]
Set regmap raw read/write from i2c quirks max read/write so regmap_raw_read/write can split the access into chunks
This seems a bit new featureish to be backporting to stable.
On Mon, Jul 05, 2021 at 01:09:20PM +0100, Mark Brown wrote:
On Sun, Jul 04, 2021 at 07:03:18PM -0400, Sasha Levin wrote:
From: Lucas Tanure tanureal@opensource.cirrus.com
[ Upstream commit ea030ca688193462b8d612c1628c37129aa30072 ]
Set regmap raw read/write from i2c quirks max read/write so regmap_raw_read/write can split the access into chunks
This seems a bit new featureish to be backporting to stable.
That's fair, I'll drop it.
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 56c1f0876293888f686e31278d183d4af2cac3c3 ]
The right thing to do is to add a new object to the building system when a certain config option is selected, and *not* override them.
So, fix obj-$(config) logic at sti makefiles, using "+=", instead of ":=".
Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/sti/bdisp/Makefile | 2 +- drivers/media/platform/sti/delta/Makefile | 2 +- drivers/media/platform/sti/hva/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/media/platform/sti/bdisp/Makefile b/drivers/media/platform/sti/bdisp/Makefile index caf7ccd193ea..39ade0a34723 100644 --- a/drivers/media/platform/sti/bdisp/Makefile +++ b/drivers/media/platform/sti/bdisp/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_VIDEO_STI_BDISP) := bdisp.o +obj-$(CONFIG_VIDEO_STI_BDISP) += bdisp.o
bdisp-objs := bdisp-v4l2.o bdisp-hw.o bdisp-debug.o diff --git a/drivers/media/platform/sti/delta/Makefile b/drivers/media/platform/sti/delta/Makefile index 92b37e216f00..32412fa4c632 100644 --- a/drivers/media/platform/sti/delta/Makefile +++ b/drivers/media/platform/sti/delta/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_VIDEO_STI_DELTA_DRIVER) := st-delta.o +obj-$(CONFIG_VIDEO_STI_DELTA_DRIVER) += st-delta.o st-delta-y := delta-v4l2.o delta-mem.o delta-ipc.o delta-debug.o
# MJPEG support diff --git a/drivers/media/platform/sti/hva/Makefile b/drivers/media/platform/sti/hva/Makefile index 74b41ec52f97..b5a5478bdd01 100644 --- a/drivers/media/platform/sti/hva/Makefile +++ b/drivers/media/platform/sti/hva/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_VIDEO_STI_HVA) := st-hva.o +obj-$(CONFIG_VIDEO_STI_HVA) += st-hva.o st-hva-y := hva-v4l2.o hva-hw.o hva-mem.o hva-h264.o st-hva-$(CONFIG_VIDEO_STI_HVA_DEBUGFS) += hva-debugfs.o
From: Valentin Schneider valentin.schneider@arm.com
[ Upstream commit 00b89fe0197f0c55a045775c11553c0cdb7082fe ]
For all intents and purposes, the idle task is a per-CPU kthread. It isn't created via the same route as other pcpu kthreads however, and as a result it is missing a few bells and whistles: it fails kthread_is_per_cpu() and it doesn't have PF_NO_SETAFFINITY set.
Fix the former by giving the idle task a kthread struct along with the KTHREAD_IS_PER_CPU flag. This requires some extra iffery as init_idle() call be called more than once on the same idle task.
Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210510151024.2448573-2-valentin.schneider@arm.co... Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/kthread.h | 2 ++ kernel/kthread.c | 30 ++++++++++++++++++------------ kernel/sched/core.c | 21 +++++++++++++++------ 3 files changed, 35 insertions(+), 18 deletions(-)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2484ed97e72f..d9133d6db308 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,8 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt);
+void set_kthread_struct(struct task_struct *p); + void kthread_set_per_cpu(struct task_struct *k, int cpu); bool kthread_is_per_cpu(struct task_struct *k);
diff --git a/kernel/kthread.c b/kernel/kthread.c index 0fccf7d0c6a1..6e02094849d3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -68,16 +68,6 @@ enum KTHREAD_BITS { KTHREAD_SHOULD_PARK, };
-static inline void set_kthread_struct(void *kthread) -{ - /* - * We abuse ->set_child_tid to avoid the new member and because it - * can't be wrongly copied by copy_process(). We also rely on fact - * that the caller can't exec, so PF_KTHREAD can't be cleared. - */ - current->set_child_tid = (__force void __user *)kthread; -} - static inline struct kthread *to_kthread(struct task_struct *k) { WARN_ON(!(k->flags & PF_KTHREAD)); @@ -103,6 +93,22 @@ static inline struct kthread *__to_kthread(struct task_struct *p) return kthread; }
+void set_kthread_struct(struct task_struct *p) +{ + struct kthread *kthread; + + if (__to_kthread(p)) + return; + + kthread = kzalloc(sizeof(*kthread), GFP_KERNEL); + /* + * We abuse ->set_child_tid to avoid the new member and because it + * can't be wrongly copied by copy_process(). We also rely on fact + * that the caller can't exec, so PF_KTHREAD can't be cleared. + */ + p->set_child_tid = (__force void __user *)kthread; +} + void free_kthread_struct(struct task_struct *k) { struct kthread *kthread; @@ -272,8 +278,8 @@ static int kthread(void *_create) struct kthread *self; int ret;
- self = kzalloc(sizeof(*self), GFP_KERNEL); - set_kthread_struct(self); + set_kthread_struct(current); + self = to_kthread(current);
/* If user was SIGKILLed, I release the structure. */ done = xchg(&create->done, NULL); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0a90d4d7663b..9724dd30ad44 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7440,12 +7440,25 @@ void __init init_idle(struct task_struct *idle, int cpu)
__sched_fork(0, idle);
+ /* + * The idle task doesn't need the kthread struct to function, but it + * is dressed up as a per-CPU kthread and thus needs to play the part + * if we want to avoid special-casing it in code that deals with per-CPU + * kthreads. + */ + set_kthread_struct(idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock);
idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - idle->flags |= PF_IDLE; + /* + * PF_KTHREAD should already be set at this point; regardless, make it + * look like a proper per-CPU kthread. + */ + idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + kthread_set_per_cpu(idle, cpu);
scs_task_reset(idle); kasan_unpoison_task_stack(idle); @@ -7662,12 +7675,8 @@ static void balance_push(struct rq *rq) /* * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. - * - * XXX: the idle task does not match kthread_is_per_cpu() due to - * histerical raisins. */ - if (rq->idle == push_task || - kthread_is_per_cpu(push_task) || + if (kthread_is_per_cpu(push_task) || is_migration_disabled(push_task)) {
/*
From: Pavel Skripkin paskripkin@gmail.com
[ Upstream commit be8656e62e9e791837b606a027802b504a945c97 ]
syzbot reported leak in cpia2 usb driver. The problem was in invalid error handling.
v4l2_device_register() is called in cpia2_init_camera_struct(), but all error cases after cpia2_init_camera_struct() did not call the v4l2_device_unregister()
Reported-by: syzbot+d1e69c888f0d3866ead4@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin paskripkin@gmail.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/usb/cpia2/cpia2.h | 1 + drivers/media/usb/cpia2/cpia2_core.c | 12 ++++++++++++ drivers/media/usb/cpia2/cpia2_usb.c | 13 +++++++------ 3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/media/usb/cpia2/cpia2.h b/drivers/media/usb/cpia2/cpia2.h index 50835f5f7512..57b7f1ea68da 100644 --- a/drivers/media/usb/cpia2/cpia2.h +++ b/drivers/media/usb/cpia2/cpia2.h @@ -429,6 +429,7 @@ int cpia2_send_command(struct camera_data *cam, struct cpia2_command *cmd); int cpia2_do_command(struct camera_data *cam, unsigned int command, unsigned char direction, unsigned char param); +void cpia2_deinit_camera_struct(struct camera_data *cam, struct usb_interface *intf); struct camera_data *cpia2_init_camera_struct(struct usb_interface *intf); int cpia2_init_camera(struct camera_data *cam); int cpia2_allocate_buffers(struct camera_data *cam); diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c index e747548ab286..b5a2d06fb356 100644 --- a/drivers/media/usb/cpia2/cpia2_core.c +++ b/drivers/media/usb/cpia2/cpia2_core.c @@ -2163,6 +2163,18 @@ static void reset_camera_struct(struct camera_data *cam) cam->height = cam->params.roi.height; }
+/****************************************************************************** + * + * cpia2_init_camera_struct + * + * Deinitialize camera struct + *****************************************************************************/ +void cpia2_deinit_camera_struct(struct camera_data *cam, struct usb_interface *intf) +{ + v4l2_device_unregister(&cam->v4l2_dev); + kfree(cam); +} + /****************************************************************************** * * cpia2_init_camera_struct diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c index 3ab80a7b4498..76aac06f9fb8 100644 --- a/drivers/media/usb/cpia2/cpia2_usb.c +++ b/drivers/media/usb/cpia2/cpia2_usb.c @@ -844,15 +844,13 @@ static int cpia2_usb_probe(struct usb_interface *intf, ret = set_alternate(cam, USBIF_CMDONLY); if (ret < 0) { ERR("%s: usb_set_interface error (ret = %d)\n", __func__, ret); - kfree(cam); - return ret; + goto alt_err; }
if((ret = cpia2_init_camera(cam)) < 0) { ERR("%s: failed to initialize cpia2 camera (ret = %d)\n", __func__, ret); - kfree(cam); - return ret; + goto alt_err; } LOG(" CPiA Version: %d.%02d (%d.%d)\n", cam->params.version.firmware_revision_hi, @@ -872,11 +870,14 @@ static int cpia2_usb_probe(struct usb_interface *intf, ret = cpia2_register_camera(cam); if (ret < 0) { ERR("%s: Failed to register cpia2 camera (ret = %d)\n", __func__, ret); - kfree(cam); - return ret; + goto alt_err; }
return 0; + +alt_err: + cpia2_deinit_camera_struct(cam, intf); + return ret; }
/******************************************************************************
From: Hans Verkuil hverkuil-cisco@xs4all.nl
[ Upstream commit 3d37ef41bed0854805ab9af22c422267510e1344 ]
The cobalt_s_bit_sysctrl reads the old register value over PCI, then changes a bit and sets writes the new value to the register.
This is used among other things for setting the HPD output pin.
But if the HPD is changed for multiple inputs at the same time, then this causes a race condition where a stale value is read.
Serialize this function with a mutex.
Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/pci/cobalt/cobalt-driver.c | 1 + drivers/media/pci/cobalt/cobalt-driver.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/media/pci/cobalt/cobalt-driver.c b/drivers/media/pci/cobalt/cobalt-driver.c index 839503e654f4..16af58f2f93c 100644 --- a/drivers/media/pci/cobalt/cobalt-driver.c +++ b/drivers/media/pci/cobalt/cobalt-driver.c @@ -667,6 +667,7 @@ static int cobalt_probe(struct pci_dev *pci_dev, return -ENOMEM; cobalt->pci_dev = pci_dev; cobalt->instance = i; + mutex_init(&cobalt->pci_lock);
retval = v4l2_device_register(&pci_dev->dev, &cobalt->v4l2_dev); if (retval) { diff --git a/drivers/media/pci/cobalt/cobalt-driver.h b/drivers/media/pci/cobalt/cobalt-driver.h index bca68572b324..12c33e035904 100644 --- a/drivers/media/pci/cobalt/cobalt-driver.h +++ b/drivers/media/pci/cobalt/cobalt-driver.h @@ -251,6 +251,8 @@ struct cobalt { int instance; struct pci_dev *pci_dev; struct v4l2_device v4l2_dev; + /* serialize PCI access in cobalt_s_bit_sysctrl() */ + struct mutex pci_lock;
void __iomem *bar0, *bar1;
@@ -320,10 +322,13 @@ static inline u32 cobalt_g_sysctrl(struct cobalt *cobalt) static inline void cobalt_s_bit_sysctrl(struct cobalt *cobalt, int bit, int val) { - u32 ctrl = cobalt_read_bar1(cobalt, COBALT_SYS_CTRL_BASE); + u32 ctrl;
+ mutex_lock(&cobalt->pci_lock); + ctrl = cobalt_read_bar1(cobalt, COBALT_SYS_CTRL_BASE); cobalt_write_bar1(cobalt, COBALT_SYS_CTRL_BASE, (ctrl & ~(1UL << bit)) | (val << bit)); + mutex_unlock(&cobalt->pci_lock); }
static inline u32 cobalt_g_sysstat(struct cobalt *cobalt)
From: Jernej Skrabec jernej.skrabec@siol.net
[ Upstream commit 67a7e53d5b21f3a84efc03a4e62db7caf97841ef ]
Dependent slice segment flag for PPS control is misnamed. It should have "enabled" at the end. It only tells if this flag is present in slice header or not and not the actual value.
Fix this by renaming the PPS flag and introduce another flag for slice control which tells actual value.
Signed-off-by: Jernej Skrabec jernej.skrabec@siol.net Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst | 5 ++++- drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 4 ++-- include/media/hevc-ctrls.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst index b0de4e6e7ebd..514b334470ea 100644 --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst @@ -3053,7 +3053,7 @@ enum v4l2_mpeg_video_hevc_size_of_length_field - :stub-columns: 0 :widths: 1 1 2
- * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT`` + * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED`` - 0x00000001 - * - ``V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT`` @@ -3277,6 +3277,9 @@ enum v4l2_mpeg_video_hevc_size_of_length_field - * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED`` - 0x00000100 - + * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT`` + - 0x00000200 + -
.. raw:: latex
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index ce497d0197df..10744fab7cea 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -477,8 +477,8 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx, slice_params->flags);
reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT, - V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT, - pps->flags); + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT, + slice_params->flags);
/* FIXME: For multi-slice support. */ reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC; diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h index b4cb2ef02f17..226fcfa0e026 100644 --- a/include/media/hevc-ctrls.h +++ b/include/media/hevc-ctrls.h @@ -81,7 +81,7 @@ struct v4l2_ctrl_hevc_sps { __u64 flags; };
-#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) #define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) #define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) #define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) @@ -160,6 +160,7 @@ struct v4l2_hevc_pred_weight_table { #define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9)
struct v4l2_ctrl_hevc_slice_params { __u32 bit_size;
From: Anirudh Rayabharam mail@anirudhrb.com
[ Upstream commit f8194e5e63fdcb349e8da9eef9e574d5b1d687cb ]
syzbot has reported the following warning in pvr2_i2c_done:
sysfs group 'power' not found for kobject '1-0043'
When the device is disconnected (pvr_hdw_disconnect), the i2c adapter is not unregistered along with the USB and v4l2 teardown. As part of the USB device disconnect, the sysfs files of the subdevices are also deleted. So, by the time pvr_i2c_core_done is called by pvr_context_destroy, the sysfs files have been deleted.
To fix this, unregister the i2c adapter too in pvr_hdw_disconnect. Make the device deregistration code shared by calling pvr_hdw_disconnect from pvr2_hdw_destroy.
Reported-by: syzbot+e74a998ca8f1df9cc332@syzkaller.appspotmail.com Tested-by: syzbot+e74a998ca8f1df9cc332@syzkaller.appspotmail.com Reviewed-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Anirudh Rayabharam mail@anirudhrb.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/usb/pvrusb2/pvrusb2-hdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index f4a727918e35..d38dee1792e4 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -2676,9 +2676,8 @@ void pvr2_hdw_destroy(struct pvr2_hdw *hdw) pvr2_stream_destroy(hdw->vid_stream); hdw->vid_stream = NULL; } - pvr2_i2c_core_done(hdw); v4l2_device_unregister(&hdw->v4l2_dev); - pvr2_hdw_remove_usb_stuff(hdw); + pvr2_hdw_disconnect(hdw); mutex_lock(&pvr2_unit_mtx); do { if ((hdw->unit_number >= 0) && @@ -2705,6 +2704,7 @@ void pvr2_hdw_disconnect(struct pvr2_hdw *hdw) { pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_disconnect(hdw=%p)",hdw); LOCK_TAKE(hdw->big_lock); + pvr2_i2c_core_done(hdw); LOCK_TAKE(hdw->ctl_lock); pvr2_hdw_remove_usb_stuff(hdw); LOCK_GIVE(hdw->ctl_lock);
From: Laurent Pinchart laurent.pinchart@ideasonboard.com
[ Upstream commit d2fcc9c2de1191ea80366e3658711753738dd10a ]
The mipi_csis_events array ends with 6 non-error events, not 4. Update mipi_csis_log_counters() accordingly. While at it, log event counters in forward order, as there's no reason to log them backward.
Signed-off-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Acked-by: Rui Miguel Silva rmfrfs@gmail.com Reviewed-by: Frieder Schrempf frieder.schrempf@kontron.de Tested-by: Frieder Schrempf frieder.schrempf@kontron.de Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/imx/imx7-mipi-csis.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c index 025fdc488bd6..25d0f89b2e53 100644 --- a/drivers/staging/media/imx/imx7-mipi-csis.c +++ b/drivers/staging/media/imx/imx7-mipi-csis.c @@ -666,13 +666,15 @@ static void mipi_csis_clear_counters(struct csi_state *state)
static void mipi_csis_log_counters(struct csi_state *state, bool non_errors) { - int i = non_errors ? MIPI_CSIS_NUM_EVENTS : MIPI_CSIS_NUM_EVENTS - 4; + unsigned int num_events = non_errors ? MIPI_CSIS_NUM_EVENTS + : MIPI_CSIS_NUM_EVENTS - 6; struct device *dev = &state->pdev->dev; unsigned long flags; + unsigned int i;
spin_lock_irqsave(&state->slock, flags);
- for (i--; i >= 0; i--) { + for (i = 0; i < num_events; ++i) { if (state->events[i].counter > 0 || state->debug) dev_info(dev, "%s events: %d\n", state->events[i].name, state->events[i].counter);
From: Jack Xu jack.xu@intel.com
[ Upstream commit 96b57229209490c8bca4335b01a426a96173dc56 ]
Check the return code of the function qat_hal_rd_rel_reg() and return it to the caller.
This is to fix the following warning when compiling the driver with clang scan-build:
drivers/crypto/qat/qat_common/qat_hal.c:1436:2: warning: 6th function call argument is an uninitialized value
Signed-off-by: Jack Xu jack.xu@intel.com Co-developed-by: Zhehui Xiang zhehui.xiang@intel.com Signed-off-by: Zhehui Xiang zhehui.xiang@intel.com Reviewed-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/qat_hal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index bd3028126cbe..069f51621f0e 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -1417,7 +1417,11 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle, pr_err("QAT: bad xfrAddr=0x%x\n", xfr_addr); return -EINVAL; } - qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval); + status = qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval); + if (status) { + pr_err("QAT: failed to read register"); + return status; + } gpr_addr = qat_hal_get_reg_addr(ICP_GPB_REL, gprnum); data16low = 0xffff & data; data16hi = 0xffff & (data >> 0x10);
From: Jack Xu jack.xu@intel.com
[ Upstream commit 9afe77cf25d9670e61b489fd52cc6f75fd7f6803 ]
Remove the unused macro ICP_DH895XCC_PESRAM_BAR_SIZE in the firmware loader.
This is to fix the following warning when compiling the driver using the clang compiler with CC=clang W=2:
drivers/crypto/qat/qat_common/qat_uclo.c:345:9: warning: macro is not used [-Wunused-macros]
Signed-off-by: Jack Xu jack.xu@intel.com Co-developed-by: Zhehui Xiang zhehui.xiang@intel.com Signed-off-by: Zhehui Xiang zhehui.xiang@intel.com Reviewed-by: Giovanni Cabiddu giovanni.cabiddu@intel.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qat/qat_common/qat_uclo.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 1fb5fc852f6b..6d95160e451e 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -342,7 +342,6 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle, return 0; }
-#define ICP_DH895XCC_PESRAM_BAR_SIZE 0x80000 static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle, struct icp_qat_uof_initmem *init_mem) {
From: Thara Gopinath thara.gopinath@linaro.org
[ Upstream commit 1339a7c3ba05137a2d2fe75f602311bbfc6fab33 ]
Use the sg count returned by dma_map_sg to call into dmaengine_prep_slave_sg rather than using the original sg count. dma_map_sg can merge consecutive sglist entries, thus making the original sg count wrong. This is a fix for memory coruption issues observed while testing encryption/decryption of large messages using libkcapi framework.
Patch has been tested further by running full suite of tcrypt.ko tests including fuzz tests.
Signed-off-by: Thara Gopinath thara.gopinath@linaro.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/qce/skcipher.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index c0a0d8c4fce1..259418479227 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -72,7 +72,7 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) struct scatterlist *sg; bool diff_dst; gfp_t gfp; - int ret; + int dst_nents, src_nents, ret;
rctx->iv = req->iv; rctx->ivsize = crypto_skcipher_ivsize(skcipher); @@ -123,21 +123,22 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) sg_mark_end(sg); rctx->dst_sg = rctx->dst_tbl.sgl;
- ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); - if (ret < 0) + dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); + if (dst_nents < 0) goto error_free;
if (diff_dst) { - ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src); - if (ret < 0) + src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src); + if (src_nents < 0) goto error_unmap_dst; rctx->src_sg = req->src; } else { rctx->src_sg = rctx->dst_sg; + src_nents = dst_nents - 1; }
- ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents, - rctx->dst_sg, rctx->dst_nents, + ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents, + rctx->dst_sg, dst_nents, qce_skcipher_done, async_req); if (ret) goto error_unmap_src;
From: Hui Tang tanghui20@huawei.com
[ Upstream commit 6889fc2104e5d20899b91e61daf07a7524b2010d ]
Add a comment that p192 will fail to register in FIPS mode.
Fix ecdh-nist-p192's entry in testmgr by removing the ifdefs and not setting fips_allowed.
Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/ecdh.c | 1 + crypto/testmgr.c | 3 --- crypto/testmgr.h | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 04a427b8c956..4227d35f5485 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -179,6 +179,7 @@ static int ecdh_init(void) { int ret;
+ /* NIST p192 will fail to register in FIPS mode */ ret = crypto_register_kpp(&ecdh_nist_p192); ecdh_nist_p192_registered = ret == 0;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 10c5b3b01ec4..26e40dba9ad2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4899,15 +4899,12 @@ static const struct alg_test_desc alg_test_descs[] = { } }, { #endif -#ifndef CONFIG_CRYPTO_FIPS .alg = "ecdh-nist-p192", .test = alg_test_kpp, - .fips_allowed = 1, .suite = { .kpp = __VECS(ecdh_p192_tv_template) } }, { -#endif .alg = "ecdh-nist-p256", .test = alg_test_kpp, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 34e4a3db3991..fe1e59da59ff 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -2685,7 +2685,6 @@ static const struct kpp_testvec curve25519_tv_template[] = { } };
-#ifndef CONFIG_CRYPTO_FIPS static const struct kpp_testvec ecdh_p192_tv_template[] = { { .secret = @@ -2725,7 +2724,6 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = { .expected_ss_size = 24 } }; -#endif
static const struct kpp_testvec ecdh_p256_tv_template[] = { {
From: Hui Tang tanghui20@huawei.com
[ Upstream commit 8fd28fa5046b377039d5bbc0ab2f625dec703980 ]
NIST P192 is not unregistered if failed to register NIST P256, actually it need to unregister the algorithms already registered.
Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/ecdh.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 4227d35f5485..e2c480859024 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -183,7 +183,16 @@ static int ecdh_init(void) ret = crypto_register_kpp(&ecdh_nist_p192); ecdh_nist_p192_registered = ret == 0;
- return crypto_register_kpp(&ecdh_nist_p256); + ret = crypto_register_kpp(&ecdh_nist_p256); + if (ret) + goto nist_p256_error; + + return 0; + +nist_p256_error: + if (ecdh_nist_p192_registered) + crypto_unregister_kpp(&ecdh_nist_p192); + return ret; }
static void ecdh_exit(void)
From: Tian Tao tiantao6@hisilicon.com
[ Upstream commit a5740e955540181f4ab8f076cc9795c6bbe4d730 ]
Use sysfs_emit instead of snprintf to avoid buf overrun,because in sysfs_emit it strictly checks whether buf is null or buf whether pagesize aligned, otherwise it returns an error.
Signed-off-by: Tian Tao tiantao6@hisilicon.com Link: https://lore.kernel.org/r/1621497585-30887-1-git-send-email-tiantao6@hisilic... Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f594957e29bd..44b6eda69a81 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -312,7 +312,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr, struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
- return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); + return sysfs_emit(page, "0x%08x\n", slots); }
static DEVICE_ATTR_RO(slots);
From: Odin Ugedal odin@uged.al
[ Upstream commit 08f7c2f4d0e9f4283f5796b8168044c034a1bfcb ]
When using something other than 8 spaces per tab, this ascii art makes not sense, and the reader might end up wondering what this advanced equation "is".
Signed-off-by: Odin Ugedal odin@uged.al Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210518125202.78658-4-odin@uged.al Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 23663318fb81..190ae8004a22 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3139,7 +3139,7 @@ void reweight_task(struct task_struct *p, int prio) * * tg->weight * grq->load.weight * ge->load.weight = ----------------------------- (1) - * \Sum grq->load.weight + * \Sum grq->load.weight * * Now, because computing that sum is prohibitively expensive to compute (been * there, done that) we approximate it with this average stuff. The average @@ -3153,7 +3153,7 @@ void reweight_task(struct task_struct *p, int prio) * * tg->weight * grq->avg.load_avg * ge->load.weight = ------------------------------ (3) - * tg->load_avg + * tg->load_avg * * Where: tg->load_avg ~= \Sum grq->avg.load_avg * @@ -3169,7 +3169,7 @@ void reweight_task(struct task_struct *p, int prio) * * tg->weight * grq->load.weight * ge->load.weight = ----------------------------- = tg->weight (4) - * grp->load.weight + * grp->load.weight * * That is, the sum collapses because all other CPUs are idle; the UP scenario. * @@ -3188,7 +3188,7 @@ void reweight_task(struct task_struct *p, int prio) * * tg->weight * grq->load.weight * ge->load.weight = ----------------------------- (6) - * tg_load_avg' + * tg_load_avg' * * Where: *
From: Roberto Sassu roberto.sassu@huawei.com
[ Upstream commit ed1b472fc15aeaa20ddeeb93fd25190014e50d17 ]
Files might come from a remote source and might have xattrs, including security.ima. It should not be IMA task to decide whether security.ima should be kept or not. This patch removes the removexattr() system call in ima_inode_post_setattr().
Signed-off-by: Roberto Sassu roberto.sassu@huawei.com Signed-off-by: Mimi Zohar zohar@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/integrity/ima/ima_appraise.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 4e5eb0236278..55dac618f2a1 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -522,8 +522,6 @@ void ima_inode_post_setattr(struct user_namespace *mnt_userns, return;
action = ima_must_appraise(mnt_userns, inode, MAY_ACCESS, POST_SETATTR); - if (!action) - __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_IMA); iint = integrity_iint_find(inode); if (iint) { set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
From: Dillon Min dillon.minfei@gmail.com
[ Upstream commit 24786ccd9c80fdb05494aa4d90fcb8f34295c193 ]
On some platform(imx6q), xvclk might not switch on in advance, also for power save purpose, xvclk should not be always on. so, add clk_prepare_enable(), clk_disable_unprepare() in driver side to set xvclk on/off at proper stage.
Add following changes: - add 'struct clk *clk;' in 'struct ov2659 {}' - enable xvclk in ov2659_power_on() - disable xvclk in ov2659_power_off()
Signed-off-by: Dillon Min dillon.minfei@gmail.com Acked-by: Lad Prabhakar prabhakar.csengg@gmail.com Signed-off-by: Sakari Ailus sakari.ailus@linux.intel.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/i2c/ov2659.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index 42f64175a6df..fb78a1cedc03 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -204,6 +204,7 @@ struct ov2659 { struct i2c_client *client; struct v4l2_ctrl_handler ctrls; struct v4l2_ctrl *link_frequency; + struct clk *clk; const struct ov2659_framesize *frame_size; struct sensor_register *format_ctrl_regs; struct ov2659_pll_ctrl pll; @@ -1270,6 +1271,8 @@ static int ov2659_power_off(struct device *dev)
gpiod_set_value(ov2659->pwdn_gpio, 1);
+ clk_disable_unprepare(ov2659->clk); + return 0; }
@@ -1278,9 +1281,17 @@ static int ov2659_power_on(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct v4l2_subdev *sd = i2c_get_clientdata(client); struct ov2659 *ov2659 = to_ov2659(sd); + int ret;
dev_dbg(&client->dev, "%s:\n", __func__);
+ ret = clk_prepare_enable(ov2659->clk); + if (ret) { + dev_err(&client->dev, "%s: failed to enable clock\n", + __func__); + return ret; + } + gpiod_set_value(ov2659->pwdn_gpio, 0);
if (ov2659->resetb_gpio) { @@ -1425,7 +1436,6 @@ static int ov2659_probe(struct i2c_client *client) const struct ov2659_platform_data *pdata = ov2659_get_pdata(client); struct v4l2_subdev *sd; struct ov2659 *ov2659; - struct clk *clk; int ret;
if (!pdata) { @@ -1440,11 +1450,11 @@ static int ov2659_probe(struct i2c_client *client) ov2659->pdata = pdata; ov2659->client = client;
- clk = devm_clk_get(&client->dev, "xvclk"); - if (IS_ERR(clk)) - return PTR_ERR(clk); + ov2659->clk = devm_clk_get(&client->dev, "xvclk"); + if (IS_ERR(ov2659->clk)) + return PTR_ERR(ov2659->clk);
- ov2659->xvclk_frequency = clk_get_rate(clk); + ov2659->xvclk_frequency = clk_get_rate(ov2659->clk); if (ov2659->xvclk_frequency < 6000000 || ov2659->xvclk_frequency > 27000000) return -EINVAL; @@ -1506,7 +1516,9 @@ static int ov2659_probe(struct i2c_client *client) ov2659->frame_size = &ov2659_framesizes[2]; ov2659->format_ctrl_regs = ov2659_formats[0].format_ctrl_regs;
- ov2659_power_on(&client->dev); + ret = ov2659_power_on(&client->dev); + if (ret < 0) + goto error;
ret = ov2659_detect(sd); if (ret < 0)
From: Tong Zhang ztong0001@gmail.com
[ Upstream commit a3a54bf4bddaecda8b5767209cfc703f0be2841d ]
There is a problem with the tasklet in bt878. bt->tasklet is set by dvb-bt8xx.ko, and bt878.ko can be loaded independently. In this case if interrupt comes it may cause null-ptr-dereference. To solve this issue, we check if the tasklet is actually set before calling tasklet_schedule.
[ 1.750438] bt878(0): irq FDSR FBUS risc_pc= [ 1.750728] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 1.752969] RIP: 0010:0x0 [ 1.757526] Call Trace: [ 1.757659] <IRQ> [ 1.757770] tasklet_action_common.isra.0+0x107/0x110 [ 1.758041] tasklet_action+0x22/0x30 [ 1.758237] __do_softirq+0xe0/0x29b [ 1.758430] irq_exit_rcu+0xa4/0xb0 [ 1.758618] common_interrupt+0x8d/0xa0 [ 1.758824] </IRQ>
Signed-off-by: Tong Zhang ztong0001@gmail.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/pci/bt8xx/bt878.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c index 78dd35c9b65d..7ca309121fb5 100644 --- a/drivers/media/pci/bt8xx/bt878.c +++ b/drivers/media/pci/bt8xx/bt878.c @@ -300,7 +300,8 @@ static irqreturn_t bt878_irq(int irq, void *dev_id) } if (astat & BT878_ARISCI) { bt->finished_block = (stat & BT878_ARISCS) >> 28; - tasklet_schedule(&bt->tasklet); + if (bt->tasklet.callback) + tasklet_schedule(&bt->tasklet); break; } count++;
From: Igor Matheus Andrade Torrente igormtorrente@gmail.com
[ Upstream commit ac5688637144644f06ed1f3c6d4dd8bb7db96020 ]
The em28xx struct kref isn't being decreased after an error in the em28xx_ir_init, leading to a possible memory leak.
A kref_put and em28xx_shutdown_buttons is added to the error handler code.
Signed-off-by: Igor Matheus Andrade Torrente igormtorrente@gmail.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/usb/em28xx/em28xx-input.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/media/usb/em28xx/em28xx-input.c b/drivers/media/usb/em28xx/em28xx-input.c index 5aa15a7a49de..59529cbf9cd0 100644 --- a/drivers/media/usb/em28xx/em28xx-input.c +++ b/drivers/media/usb/em28xx/em28xx-input.c @@ -720,7 +720,8 @@ static int em28xx_ir_init(struct em28xx *dev) dev->board.has_ir_i2c = 0; dev_warn(&dev->intf->dev, "No i2c IR remote control device found.\n"); - return -ENODEV; + err = -ENODEV; + goto ref_put; } }
@@ -735,7 +736,7 @@ static int em28xx_ir_init(struct em28xx *dev)
ir = kzalloc(sizeof(*ir), GFP_KERNEL); if (!ir) - return -ENOMEM; + goto ref_put; rc = rc_allocate_device(RC_DRIVER_SCANCODE); if (!rc) goto error; @@ -839,6 +840,9 @@ static int em28xx_ir_init(struct em28xx *dev) dev->ir = NULL; rc_free_device(rc); kfree(ir); +ref_put: + em28xx_shutdown_buttons(dev); + kref_put(&dev->ref, em28xx_free_device); return err; }
From: Andrzej Pietrasiewicz andrzej.p@collabora.com
[ Upstream commit 082aaecff35fbe1937531057911b1dd1fc6b496e ]
The driver should only set the payload on .buf_prepare if the buffer is CAPTURE type. If an OUTPUT buffer has a zero bytesused set by userspace then v4l2-core will set it to buffer length.
If we overwrite bytesused for OUTPUT buffers, too, then vb2_get_plane_payload() will return incorrect value which might be then written to hw registers by the driver in hantro_g1_h264_dec.c.
Signed-off-by: Andrzej Pietrasiewicz andrzej.p@collabora.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/hantro/hantro_v4l2.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index 1bc118e375a1..7ccc6405036a 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -639,7 +639,14 @@ static int hantro_buf_prepare(struct vb2_buffer *vb) ret = hantro_buf_plane_check(vb, pix_fmt); if (ret) return ret; - vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage); + /* + * Buffer's bytesused must be written by driver for CAPTURE buffers. + * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets + * it to buffer length). + */ + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage); + return 0; }
From: Andrzej Pietrasiewicz andrzej.p@collabora.com
[ Upstream commit d84b9202d712309840f8b5abee0ed272506563bd ]
The driver should only set the payload on .buf_prepare if the buffer is CAPTURE type. If an OUTPUT buffer has a zero bytesused set by userspace then v4l2-core will set it to buffer length.
If we overwrite bytesused for OUTPUT buffers, too, then vb2_get_plane_payload() will return incorrect value which might be then written to hw registers by the driver in cedrus_h264.c or cedrus_vp8.c.
Signed-off-by: Andrzej Pietrasiewicz andrzej.p@collabora.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/sunxi/cedrus/cedrus_video.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index b62eb8e84057..bf731caf2ed5 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c @@ -457,7 +457,13 @@ static int cedrus_buf_prepare(struct vb2_buffer *vb) if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage) return -EINVAL;
- vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage); + /* + * Buffer's bytesused must be written by driver for CAPTURE buffers. + * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets + * it to buffer length). + */ + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
return 0; }
From: Lv Yunlong lyl2019@mail.ustc.edu.cn
[ Upstream commit 7dd0c9e547b6924e18712b6b51aa3cba1896ee2c ]
A use after free bug caused by the dangling pointer filp->privitate_data in v4l2_fh_release. See https://lore.kernel.org/patchwork/patch/1419058/.
My patch sets the dangling pointer to NULL to provide robust.
Signed-off-by: Lv Yunlong lyl2019@mail.ustc.edu.cn Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/v4l2-core/v4l2-fh.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c index 684574f58e82..90eec79ee995 100644 --- a/drivers/media/v4l2-core/v4l2-fh.c +++ b/drivers/media/v4l2-core/v4l2-fh.c @@ -96,6 +96,7 @@ int v4l2_fh_release(struct file *filp) v4l2_fh_del(fh); v4l2_fh_exit(fh); kfree(fh); + filp->private_data = NULL; } return 0; }
From: Zheyu Ma zheyuma97@gmail.com
[ Upstream commit 1a4520090681853e6b850cbe54b27247a013e0e5 ]
In 'bt878_irq', the driver calls 'tasklet_schedule', but this tasklet is set in 'dvb_bt8xx_load_card' of another driver 'dvb-bt8xx'. However, this two drivers are separate. The user may not load the 'dvb-bt8xx' driver when loading the 'bt8xx' driver, that is, the tasklet has not been initialized when 'tasklet_schedule' is called, so it is necessary to check whether the tasklet is initialized in 'bt878_probe'.
Fix this by adding a check at the end of bt878_probe.
The KASAN's report reveals it:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000006aab2067 P4D 800000006aab2067 PUD 6b2ea067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN PTI CPU: 2 PID: 8724 Comm: syz-executor.0 Not tainted 4.19.177- gdba4159c14ef-dirty #40 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59- gc9ba5276e321-prebuilt.qemu.org 04/01/2014 RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffff88806c287ea0 EFLAGS: 00010246 RAX: fffffbfff1b01774 RBX: dffffc0000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 1ffffffff1b01775 RDI: 0000000000000000 RBP: ffff88806c287f00 R08: fffffbfff1b01774 R09: fffffbfff1b01774 R10: 0000000000000001 R11: fffffbfff1b01773 R12: 0000000000000000 R13: ffff88806c29f530 R14: ffffffff8d80bb88 R15: ffffffff8d80bb90 FS: 00007f6b550e6700(0000) GS:ffff88806c280000(0000) knlGS: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000005ec98000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <IRQ> tasklet_action_common.isra.17+0x141/0x420 kernel/softirq.c:522 tasklet_action+0x50/0x70 kernel/softirq.c:540 __do_softirq+0x224/0x92c kernel/softirq.c:292 invoke_softirq kernel/softirq.c:372 [inline] irq_exit+0x15a/0x180 kernel/softirq.c:412 exiting_irq arch/x86/include/asm/apic.h:535 [inline] do_IRQ+0x123/0x1e0 arch/x86/kernel/irq.c:260 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670 </IRQ> RIP: 0010:__do_sys_interrupt kernel/sys.c:2593 [inline] RIP: 0010:__se_sys_interrupt kernel/sys.c:2584 [inline] RIP: 0010:__x64_sys_interrupt+0x5b/0x80 kernel/sys.c:2584 Code: ba 00 04 00 00 48 c7 c7 c0 99 31 8c e8 ae 76 5e 01 48 85 c0 75 21 e8 14 ae 24 00 48 c7 c3 c0 99 31 8c b8 0c 00 00 00 0f 01 c1 <31> db e8 fe ad 24 00 48 89 d8 5b 5d c3 48 c7 c3 ea ff ff ff eb ec RSP: 0018:ffff888054167f10 EFLAGS: 00000212 ORIG_RAX: ffffffffffffffde RAX: 000000000000000c RBX: ffffffff8c3199c0 RCX: ffffc90001ca6000 RDX: 000000000000001a RSI: ffffffff813478fc RDI: ffffffff8c319dc0 RBP: ffff888054167f18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000080 R11: fffffbfff18633b7 R12: ffff888054167f58 R13: ffff88805f638000 R14: 0000000000000000 R15: 0000000000000000 do_syscall_64+0xb0/0x4e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4692a9 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6b550e5c48 EFLAGS: 00000246 ORIG_RAX: 000000000000014f RAX: ffffffffffffffda RBX: 000000000077bf60 RCX: 00000000004692a9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000140 RBP: 00000000004cf7eb R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000077bf60 R13: 0000000000000000 R14: 000000000077bf60 R15: 00007fff55a1dca0 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) CR2: 0000000000000000 ---[ end trace 68e5849c3f77cbb6 ]--- RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffff88806c287ea0 EFLAGS: 00010246 RAX: fffffbfff1b01774 RBX: dffffc0000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 1ffffffff1b01775 RDI: 0000000000000000 RBP: ffff88806c287f00 R08: fffffbfff1b01774 R09: fffffbfff1b01774 R10: 0000000000000001 R11: fffffbfff1b01773 R12: 0000000000000000 R13: ffff88806c29f530 R14: ffffffff8d80bb88 R15: ffffffff8d80bb90 FS: 00007f6b550e6700(0000) GS:ffff88806c280000(0000) knlGS: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000005ec98000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Reported-by: Zheyu Ma zheyuma97@gmail.com Signed-off-by: Zheyu Ma zheyuma97@gmail.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/pci/bt8xx/bt878.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c index 7ca309121fb5..90972d6952f1 100644 --- a/drivers/media/pci/bt8xx/bt878.c +++ b/drivers/media/pci/bt8xx/bt878.c @@ -478,6 +478,9 @@ static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id) btwrite(0, BT878_AINT_MASK); bt878_num++;
+ if (!bt->tasklet.func) + tasklet_disable(&bt->tasklet); + return 0;
fail2:
From: Evgeny Novikov novikov@ispras.ru
[ Upstream commit b7fdd208687ba59ebfb09b2199596471c63b69e3 ]
When ctx_id >= HVA_MAX_INSTANCES in hva_hw_its_irq_thread() it tries to access fields of ctx that is NULL at that point. The patch gets rid of these accesses.
Found by Linux Driver Verification project (linuxtesting.org).
Signed-off-by: Evgeny Novikov novikov@ispras.ru Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/sti/hva/hva-hw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/media/platform/sti/hva/hva-hw.c b/drivers/media/platform/sti/hva/hva-hw.c index f59811e27f51..6eeee5017fac 100644 --- a/drivers/media/platform/sti/hva/hva-hw.c +++ b/drivers/media/platform/sti/hva/hva-hw.c @@ -130,8 +130,7 @@ static irqreturn_t hva_hw_its_irq_thread(int irq, void *arg) ctx_id = (hva->sts_reg & 0xFF00) >> 8; if (ctx_id >= HVA_MAX_INSTANCES) { dev_err(dev, "%s %s: bad context identifier: %d\n", - ctx->name, __func__, ctx_id); - ctx->hw_err = true; + HVA_PREFIX, __func__, ctx_id); goto out; }
From: Kai Ye yekai13@huawei.com
[ Upstream commit 6161f40c630bd7ced5f236cd5fbabec06e47afae ]
Fixup the 3des algorithm minimum key size declaration.
Signed-off-by: Kai Ye yekai13@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 133aede8bf07..b43fad8b9e8d 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -1541,11 +1541,11 @@ static struct skcipher_alg sec_skciphers[] = { AES_BLOCK_SIZE, AES_BLOCK_SIZE)
SEC_SKCIPHER_ALG("ecb(des3_ede)", sec_setkey_3des_ecb, - SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE, + SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE, DES3_EDE_BLOCK_SIZE, 0)
SEC_SKCIPHER_ALG("cbc(des3_ede)", sec_setkey_3des_cbc, - SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE, + SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE, DES3_EDE_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE)
SEC_SKCIPHER_ALG("xts(sm4)", sec_setkey_sm4_xts,
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit bf6fa2c0dda751863c3446aa64d733013bec4a19 ]
The code in entry-common.c runs at exception entry and return boundaries, where portions of the kernel environment aren't available. For example, RCU may not be watching, and lockdep state may be out-of-sync with the hardware. Due to this, it is not sound to instrument this code.
We generally avoid instrumentation by marking the entry functions as `noinstr`, but currently this doesn't inhibit KCOV instrumentation. Prevent this by disabling KCOV for the entire compilation unit.
Signed-off-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Marc Zyngier maz@kernel.org Cc: James Morse james.morse@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20210607094624.34689-20-mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/Makefile | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 6cc97730790e..787c3c83edd7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector
+# It's not safe to invoke KCOV when portions of the kernel environment aren't +# available or are out-of-sync with HW state. Since `noinstr` doesn't always +# inhibit KCOV instrumentation, disable it for the entire compilation unit. +KCOV_INSTRUMENT_entry.o := n + # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-common.o entry-fpsimd.o process.o ptrace.o \
From: Nick Desaulniers ndesaulniers@google.com
[ Upstream commit 27f2a4db76e8d8a8b601fc1c6a7a17f88bd907ab ]
GDB produces the following warning when debugging kernels built with CONFIG_RELR:
BFD: /android0/linux-next/vmlinux: unknown type [0x13] section `.relr.dyn'
when loading a kernel built with CONFIG_RELR into GDB. It can also prevent debugging symbols using such relocations.
Peter sugguests: [That flag] means that lld will use dynamic tags and section type numbers in the OS-specific range rather than the generic range. The kernel itself doesn't care about these numbers; it determines the location of the RELR section using symbols defined by a linker script.
Link: https://github.com/ClangBuiltLinux/linux/issues/1057 Suggested-by: Peter Collingbourne pcc@google.com Reviewed-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Nick Desaulniers ndesaulniers@google.com Link: https://lore.kernel.org/r/20210522012626.2811297-1-ndesaulniers@google.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- Makefile | 2 +- scripts/tools-support-relr.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile index 0565caea0362..88888fff4c62 100644 --- a/Makefile +++ b/Makefile @@ -1039,7 +1039,7 @@ LDFLAGS_vmlinux += $(call ld-option, -X,) endif
ifeq ($(CONFIG_RELR),y) -LDFLAGS_vmlinux += --pack-dyn-relocs=relr +LDFLAGS_vmlinux += --pack-dyn-relocs=relr --use-android-relr-tags endif
# We never want expected sections to be placed heuristically by the diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh index 45e8aa360b45..cb55878bd5b8 100755 --- a/scripts/tools-support-relr.sh +++ b/scripts/tools-support-relr.sh @@ -7,7 +7,8 @@ trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1 void *p = &p; END -$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file +$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr \ + --use-android-relr-tags -o $tmp_file
# Despite printing an error message, GNU nm still exits with exit code 0 if it # sees a relr section. So we need to check that nothing is printed to stderr.
From: Dongliang Mu mudongliangabcd@gmail.com
[ Upstream commit 9ad1efee086e0e913914fa2b2173efb830bad68c ]
When the driver fails to talk with the hardware with dvb_usb_generic_rw, it will return an error to dvb_usb_adapter_frontend_init. However, the driver forgets to free the resource (e.g., struct cinergyt2_fe_state), which leads to a memory leak.
Fix this by freeing struct cinergyt2_fe_state when dvb_usb_generic_rw fails in cinergyt2_frontend_attach.
backtrace: [<0000000056e17b1a>] kmalloc include/linux/slab.h:552 [inline] [<0000000056e17b1a>] kzalloc include/linux/slab.h:682 [inline] [<0000000056e17b1a>] cinergyt2_fe_attach+0x21/0x80 drivers/media/usb/dvb-usb/cinergyT2-fe.c:271 [<00000000ae0b1711>] cinergyt2_frontend_attach+0x21/0x70 drivers/media/usb/dvb-usb/cinergyT2-core.c:74 [<00000000d0254861>] dvb_usb_adapter_frontend_init+0x11b/0x1b0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290 [<0000000002e08ac6>] dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:84 [inline] [<0000000002e08ac6>] dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:173 [inline] [<0000000002e08ac6>] dvb_usb_device_init.cold+0x4d0/0x6ae drivers/media/usb/dvb-usb/dvb-usb-init.c:287
Reported-by: syzbot+e1de8986786b3722050e@syzkaller.appspotmail.com Signed-off-by: Dongliang Mu mudongliangabcd@gmail.com Signed-off-by: Sean Young sean@mess.org Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/usb/dvb-usb/cinergyT2-core.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c index 969a7ec71dff..4116ba5c45fc 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-core.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c @@ -78,6 +78,8 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap)
ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 3, 0); if (ret < 0) { + if (adap->fe_adap[0].fe) + adap->fe_adap[0].fe->ops.release(adap->fe_adap[0].fe); deb_rc("cinergyt2_power_ctrl() Failed to retrieve sleep state info\n"); } mutex_unlock(&d->data_mutex);
From: Tong Zhang ztong0001@gmail.com
[ Upstream commit 42933c8aa14be1caa9eda41f65cde8a3a95d3e39 ]
This patch fixes the following issues: 1. memstick_free_host() will free the host, so the use of ms_dev(host) after it will be a problem. To fix this, move memstick_free_host() after when we are done with ms_dev(host). 2. In rtsx_usb_ms_drv_remove(), pm need to be disabled before we remove and free host otherwise memstick_check will be called and UAF will happen.
[ 11.351173] BUG: KASAN: use-after-free in rtsx_usb_ms_drv_remove+0x94/0x140 [rtsx_usb_ms] [ 11.357077] rtsx_usb_ms_drv_remove+0x94/0x140 [rtsx_usb_ms] [ 11.357376] platform_remove+0x2a/0x50 [ 11.367531] Freed by task 298: [ 11.368537] kfree+0xa4/0x2a0 [ 11.368711] device_release+0x51/0xe0 [ 11.368905] kobject_put+0xa2/0x120 [ 11.369090] rtsx_usb_ms_drv_remove+0x8c/0x140 [rtsx_usb_ms] [ 11.369386] platform_remove+0x2a/0x50
[ 12.038408] BUG: KASAN: use-after-free in __mutex_lock.isra.0+0x3ec/0x7c0 [ 12.045432] mutex_lock+0xc9/0xd0 [ 12.046080] memstick_check+0x6a/0x578 [memstick] [ 12.046509] process_one_work+0x46d/0x750 [ 12.052107] Freed by task 297: [ 12.053115] kfree+0xa4/0x2a0 [ 12.053272] device_release+0x51/0xe0 [ 12.053463] kobject_put+0xa2/0x120 [ 12.053647] rtsx_usb_ms_drv_remove+0xc4/0x140 [rtsx_usb_ms] [ 12.053939] platform_remove+0x2a/0x50
Signed-off-by: Tong Zhang ztong0001@gmail.com Co-developed-by: Ulf Hansson ulf.hansson@linaro.org Link: https://lore.kernel.org/r/20210511163944.1233295-1-ztong0001@gmail.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/memstick/host/rtsx_usb_ms.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 102dbb8080da..29271ad4728a 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -799,9 +799,9 @@ static int rtsx_usb_ms_drv_probe(struct platform_device *pdev)
return 0; err_out: - memstick_free_host(msh); pm_runtime_disable(ms_dev(host)); pm_runtime_put_noidle(ms_dev(host)); + memstick_free_host(msh); return err; }
@@ -828,9 +828,6 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev) } mutex_unlock(&host->host_mutex);
- memstick_remove_host(msh); - memstick_free_host(msh); - /* Balance possible unbalanced usage count * e.g. unconditional module removal */ @@ -838,10 +835,11 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev) pm_runtime_put(ms_dev(host));
pm_runtime_disable(ms_dev(host)); - platform_set_drvdata(pdev, NULL); - + memstick_remove_host(msh); dev_dbg(ms_dev(host), ": Realtek USB Memstick controller has been removed\n"); + memstick_free_host(msh); + platform_set_drvdata(pdev, NULL);
return 0; }
From: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
[ Upstream commit 961470820021e6f9d74db4837bd6831a1a30341b ]
The sdhci_sprd_writew() was defined by never used in sdhci_ops:
drivers/mmc/host/sdhci-sprd.c:134:20: warning: unused function 'sdhci_sprd_writew'
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com Link: https://lore.kernel.org/r/20210601095403.236007-2-krzysztof.kozlowski@canoni... Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mmc/host/sdhci-sprd.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 5dc36efff47f..11e375579cfb 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -393,6 +393,7 @@ static void sdhci_sprd_request_done(struct sdhci_host *host, static struct sdhci_ops sdhci_sprd_ops = { .read_l = sdhci_sprd_readl, .write_l = sdhci_sprd_writel, + .write_w = sdhci_sprd_writew, .write_b = sdhci_sprd_writeb, .set_clock = sdhci_sprd_set_clock, .get_max_clock = sdhci_sprd_get_max_clock,
From: Zheyu Ma zheyuma97@gmail.com
[ Upstream commit 45c8ddd06c4b729c56a6083ab311bfbd9643f4a6 ]
Before referencing 'host->data', the driver needs to check whether it is null pointer, otherwise it will cause a null pointer reference.
This log reveals it:
[ 29.355199] BUG: kernel NULL pointer dereference, address: 0000000000000014 [ 29.357323] #PF: supervisor write access in kernel mode [ 29.357706] #PF: error_code(0x0002) - not-present page [ 29.358088] PGD 0 P4D 0 [ 29.358280] Oops: 0002 [#1] PREEMPT SMP PTI [ 29.358595] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.12.4- g70e7f0549188-dirty #102 [ 29.359164] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 29.359978] RIP: 0010:via_sdc_isr+0x21f/0x410 [ 29.360314] Code: ff ff e8 84 aa d0 fd 66 45 89 7e 28 66 41 f7 c4 00 10 75 56 e8 72 aa d0 fd 66 41 f7 c4 00 c0 74 10 e8 65 aa d0 fd 48 8b 43 18 <c7> 40 14 ac ff ff ff e8 55 aa d0 fd 48 89 df e8 ad fb ff ff e9 77 [ 29.361661] RSP: 0018:ffffc90000118e98 EFLAGS: 00010046 [ 29.362042] RAX: 0000000000000000 RBX: ffff888107d77880 RCX: 0000000000000000 [ 29.362564] RDX: 0000000000000000 RSI: ffffffff835d20bb RDI: 00000000ffffffff [ 29.363085] RBP: ffffc90000118ed8 R08: 0000000000000001 R09: 0000000000000001 [ 29.363604] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000008600 [ 29.364128] R13: ffff888107d779c8 R14: ffffc90009c00200 R15: 0000000000008000 [ 29.364651] FS: 0000000000000000(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000 [ 29.365235] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.365655] CR2: 0000000000000014 CR3: 0000000005a2e000 CR4: 00000000000006e0 [ 29.366170] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 29.366683] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 29.367197] Call Trace: [ 29.367381] <IRQ> [ 29.367537] __handle_irq_event_percpu+0x53/0x3e0 [ 29.367916] handle_irq_event_percpu+0x35/0x90 [ 29.368247] handle_irq_event+0x39/0x60 [ 29.368632] handle_fasteoi_irq+0xc2/0x1d0 [ 29.368950] __common_interrupt+0x7f/0x150 [ 29.369254] common_interrupt+0xb4/0xd0 [ 29.369547] </IRQ> [ 29.369708] asm_common_interrupt+0x1e/0x40 [ 29.370016] RIP: 0010:native_safe_halt+0x17/0x20 [ 29.370360] Code: 07 0f 00 2d db 80 43 00 f4 5d c3 0f 1f 84 00 00 00 00 00 8b 05 c2 37 e5 01 55 48 89 e5 85 c0 7e 07 0f 00 2d bb 80 43 00 fb f4 <5d> c3 cc cc cc cc cc cc cc 55 48 89 e5 e8 67 53 ff ff 8b 0d f9 91 [ 29.371696] RSP: 0018:ffffc9000008fe90 EFLAGS: 00000246 [ 29.372079] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 [ 29.372595] RDX: 0000000000000000 RSI: ffffffff854f67a4 RDI: ffffffff85403406 [ 29.373122] RBP: ffffc9000008fe90 R08: 0000000000000001 R09: 0000000000000001 [ 29.373646] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff86009188 [ 29.374160] R13: 0000000000000000 R14: 0000000000000000 R15: ffff888100258000 [ 29.374690] default_idle+0x9/0x10 [ 29.374944] arch_cpu_idle+0xa/0x10 [ 29.375198] default_idle_call+0x6e/0x250 [ 29.375491] do_idle+0x1f0/0x2d0 [ 29.375740] cpu_startup_entry+0x18/0x20 [ 29.376034] start_secondary+0x11f/0x160 [ 29.376328] secondary_startup_64_no_verify+0xb0/0xbb [ 29.376705] Modules linked in: [ 29.376939] Dumping ftrace buffer: [ 29.377187] (ftrace buffer empty) [ 29.377460] CR2: 0000000000000014 [ 29.377712] ---[ end trace 51a473dffb618c47 ]--- [ 29.378056] RIP: 0010:via_sdc_isr+0x21f/0x410 [ 29.378380] Code: ff ff e8 84 aa d0 fd 66 45 89 7e 28 66 41 f7 c4 00 10 75 56 e8 72 aa d0 fd 66 41 f7 c4 00 c0 74 10 e8 65 aa d0 fd 48 8b 43 18 <c7> 40 14 ac ff ff ff e8 55 aa d0 fd 48 89 df e8 ad fb ff ff e9 77 [ 29.379714] RSP: 0018:ffffc90000118e98 EFLAGS: 00010046 [ 29.380098] RAX: 0000000000000000 RBX: ffff888107d77880 RCX: 0000000000000000 [ 29.380614] RDX: 0000000000000000 RSI: ffffffff835d20bb RDI: 00000000ffffffff [ 29.381134] RBP: ffffc90000118ed8 R08: 0000000000000001 R09: 0000000000000001 [ 29.381653] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000008600 [ 29.382176] R13: ffff888107d779c8 R14: ffffc90009c00200 R15: 0000000000008000 [ 29.382697] FS: 0000000000000000(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000 [ 29.383277] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.383697] CR2: 0000000000000014 CR3: 0000000005a2e000 CR4: 00000000000006e0 [ 29.384223] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 29.384736] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 29.385260] Kernel panic - not syncing: Fatal exception in interrupt [ 29.385882] Dumping ftrace buffer: [ 29.386135] (ftrace buffer empty) [ 29.386401] Kernel Offset: disabled [ 29.386656] Rebooting in 1 seconds..
Signed-off-by: Zheyu Ma zheyuma97@gmail.com Link: https://lore.kernel.org/r/1622727200-15808-1-git-send-email-zheyuma97@gmail.... Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mmc/host/via-sdmmc.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c index a1d098560099..c32df5530b94 100644 --- a/drivers/mmc/host/via-sdmmc.c +++ b/drivers/mmc/host/via-sdmmc.c @@ -857,6 +857,9 @@ static void via_sdc_data_isr(struct via_crdr_mmc_host *host, u16 intmask) { BUG_ON(intmask == 0);
+ if (!host->data) + return; + if (intmask & VIA_CRDR_SDSTS_DT) host->data->error = -ETIMEDOUT; else if (intmask & (VIA_CRDR_SDSTS_RC | VIA_CRDR_SDSTS_WC))
From: Andrew Jeffery andrew@aj.id.au
[ Upstream commit a7ab186f60785850b5af1be183867000485ad491 ]
The card timing and the bus frequency are not changed atomically with respect to calls to the set_clock() callback in the driver. The result is the driver sees a transient state where there's a mismatch between the two and thus the inputs to the phase correction calculation formula are garbage.
Switch from dev_warn() to dev_dbg() to avoid noise in the normal case, though the change does make bad configurations less likely to be noticed.
Reported-by: Joel Stanley joel@jms.id.au Signed-off-by: Andrew Jeffery andrew@aj.id.au Reviewed-by: Joel Stanley joel@jms.id.au Link: https://lore.kernel.org/r/20210607013020.85885-1-andrew@aj.id.au Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mmc/host/sdhci-of-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c index d001c51074a0..e4665a438ec5 100644 --- a/drivers/mmc/host/sdhci-of-aspeed.c +++ b/drivers/mmc/host/sdhci-of-aspeed.c @@ -150,7 +150,7 @@ static int aspeed_sdhci_phase_to_tap(struct device *dev, unsigned long rate_hz,
tap = div_u64(phase_period_ps, prop_delay_ps); if (tap > ASPEED_SDHCI_NR_TAPS) { - dev_warn(dev, + dev_dbg(dev, "Requested out of range phase tap %d for %d degrees of phase compensation at %luHz, clamping to tap %d\n", tap, phase_deg, rate_hz, ASPEED_SDHCI_NR_TAPS); tap = ASPEED_SDHCI_NR_TAPS;
From: zpershuai zpershuai@gmail.com
[ Upstream commit 95730d5eb73170a6d225a9998c478be273598634 ]
In meson_spifc_probe function, when enable the device pclk clock is error, it should use clk_disable_unprepare to release the core clock.
Signed-off-by: zpershuai zpershuai@gmail.com Reviewed-by: Neil Armstrong narmstrong@baylibre.com Link: https://lore.kernel.org/r/1623562172-22056-1-git-send-email-zpershuai@gmail.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-meson-spicc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index ecba6b4a5d85..51aef2c6e966 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -725,7 +725,7 @@ static int meson_spicc_probe(struct platform_device *pdev) ret = clk_prepare_enable(spicc->pclk); if (ret) { dev_err(&pdev->dev, "pclk clock enable failed\n"); - goto out_master; + goto out_core_clk; }
device_reset_optional(&pdev->dev); @@ -764,9 +764,11 @@ static int meson_spicc_probe(struct platform_device *pdev) return 0;
out_clk: - clk_disable_unprepare(spicc->core); clk_disable_unprepare(spicc->pclk);
+out_core_clk: + clk_disable_unprepare(spicc->core); + out_master: spi_master_put(master);
From: zpershuai zpershuai@gmail.com
[ Upstream commit b2d501c13470409ee7613855b17e5e5ec4111e1c ]
when meson_spicc_clk_init returns failed, it should goto the out_clk label.
Signed-off-by: zpershuai zpershuai@gmail.com Reviewed-by: Neil Armstrong narmstrong@baylibre.com Link: https://lore.kernel.org/r/1623562156-21995-1-git-send-email-zpershuai@gmail.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-meson-spicc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index 51aef2c6e966..b2c4621db34d 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -752,7 +752,7 @@ static int meson_spicc_probe(struct platform_device *pdev) ret = meson_spicc_clk_init(spicc); if (ret) { dev_err(&pdev->dev, "clock registration failed\n"); - goto out_master; + goto out_clk; }
ret = devm_spi_register_master(&pdev->dev, master);
From: Axel Lin axel.lin@ingics.com
[ Upstream commit 70d654ea3de937d7754c107bb8eeb20e30262c89 ]
devm_regmap_init_spmi_ext() returns ERR_PTR() on error.
Signed-off-by: Axel Lin axel.lin@ingics.com Link: https://lore.kernel.org/r/20210615132934.3453965-1-axel.lin@ingics.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/mt6315-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c index 6b8be52c3772..7514702f78cf 100644 --- a/drivers/regulator/mt6315-regulator.c +++ b/drivers/regulator/mt6315-regulator.c @@ -223,8 +223,8 @@ static int mt6315_regulator_probe(struct spmi_device *pdev) int i;
regmap = devm_regmap_init_spmi_ext(pdev, &mt6315_regmap_config); - if (!regmap) - return -ENODEV; + if (IS_ERR(regmap)) + return PTR_ERR(regmap);
chip = devm_kzalloc(dev, sizeof(struct mt6315_chip), GFP_KERNEL); if (!chip)
From: Ard Biesheuvel ardb@kernel.org
[ Upstream commit 22ca9f4aaf431a9413dcc115dd590123307f274f ]
crypto_shash_alg_has_setkey() is implemented by testing whether the .setkey() member of a struct shash_alg points to the default version, called shash_no_setkey(). As crypto_shash_alg_has_setkey() is a static inline, this requires shash_no_setkey() to be exported to modules.
Unfortunately, when building with CFI, function pointers are routed via CFI stubs which are private to each module (or to the kernel proper) and so this function pointer comparison may fail spuriously.
Let's fix this by turning crypto_shash_alg_has_setkey() into an out of line function.
Cc: Sami Tolvanen samitolvanen@google.com Cc: Eric Biggers ebiggers@kernel.org Signed-off-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Eric Biggers ebiggers@google.com Reviewed-by: Sami Tolvanen samitolvanen@google.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/shash.c | 18 +++++++++++++++--- include/crypto/internal/hash.h | 8 +------- 2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/crypto/shash.c b/crypto/shash.c index 2e3433ad9762..0a0a50cb694f 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -20,12 +20,24 @@
static const struct crypto_type crypto_shash_type;
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } -EXPORT_SYMBOL_GPL(shash_no_setkey); + +/* + * Check whether an shash algorithm has a setkey function. + * + * For CFI compatibility, this must not be an inline function. This is because + * when CFI is enabled, modules won't get the same address for shash_no_setkey + * (if it were exported, which inlining would require) as the core kernel will. + */ +bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} +EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey);
static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0a288dddcf5b..25806141db59 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -75,13 +75,7 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst);
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen); - -static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) -{ - return alg->setkey != shash_no_setkey; -} +bool crypto_shash_alg_has_setkey(struct shash_alg *alg);
static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) {
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit abc0226df64dc137b48b911c1fe4319aec5891bb ]
The risk of especulation is actually almost-non-existing here, as there are very few users of TCP/IP using the DVB stack, as, this is mainly used with DVB-S/S2 cards, and only by people that receives TCP/IP from satellite connections, which limits a lot the number of users of such feature(*).
(*) In thesis, DVB-C cards could also benefit from it, but I'm yet to see a hardware that supports it.
Yet, fixing it is trivial.
Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/dvb-core/dvb_net.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 89620da983ba..dddebea644bb 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -45,6 +45,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/nospec.h> #include <linux/etherdevice.h> #include <linux/dvb/net.h> #include <linux/uio.h> @@ -1462,14 +1463,20 @@ static int dvb_net_do_ioctl(struct file *file, struct net_device *netdev; struct dvb_net_priv *priv_data; struct dvb_net_if *dvbnetif = parg; + int if_num = dvbnetif->if_num;
- if (dvbnetif->if_num >= DVB_NET_DEVICES_MAX || - !dvbnet->state[dvbnetif->if_num]) { + if (if_num >= DVB_NET_DEVICES_MAX) { ret = -EINVAL; goto ioctl_error; } + if_num = array_index_nospec(if_num, DVB_NET_DEVICES_MAX);
- netdev = dvbnet->device[dvbnetif->if_num]; + if (!dvbnet->state[if_num]) { + ret = -EINVAL; + goto ioctl_error; + } + + netdev = dvbnet->device[if_num];
priv_data = netdev_priv(netdev); dvbnetif->pid=priv_data->pid; @@ -1522,14 +1529,20 @@ static int dvb_net_do_ioctl(struct file *file, struct net_device *netdev; struct dvb_net_priv *priv_data; struct __dvb_net_if_old *dvbnetif = parg; + int if_num = dvbnetif->if_num; + + if (if_num >= DVB_NET_DEVICES_MAX) { + ret = -EINVAL; + goto ioctl_error; + } + if_num = array_index_nospec(if_num, DVB_NET_DEVICES_MAX);
- if (dvbnetif->if_num >= DVB_NET_DEVICES_MAX || - !dvbnet->state[dvbnetif->if_num]) { + if (!dvbnet->state[if_num]) { ret = -EINVAL; goto ioctl_error; }
- netdev = dvbnet->device[dvbnetif->if_num]; + netdev = dvbnet->device[if_num];
priv_data = netdev_priv(netdev); dvbnetif->pid=priv_data->pid;
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 1fec2ecc252301110e4149e6183fa70460d29674 ]
As reported by smatch:
drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:510 dvb_register_device() warn: '&dvbdev->list_head' not removed from list drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:530 dvb_register_device() warn: '&dvbdev->list_head' not removed from list drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:545 dvb_register_device() warn: '&dvbdev->list_head' not removed from list
The error logic inside dvb_register_device() doesn't remove devices from the dvb_adapter_list in case of errors.
Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/dvb-core/dvbdev.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 3862ddc86ec4..795d9bfaba5c 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -506,6 +506,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, break;
if (minor == MAX_DVB_MINORS) { + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); up_write(&minor_rwsem); @@ -526,6 +527,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, __func__);
dvb_media_device_free(dvbdev); + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); mutex_unlock(&dvbdev_register_lock); @@ -541,6 +543,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); dvb_media_device_free(dvbdev); + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); return PTR_ERR(clsdev);
From: Mauro Carvalho Chehab mchehab+huawei@kernel.org
[ Upstream commit 5368b1ee2939961a16e74972b69088433fc52195 ]
As reported by smatch: drivers/media/common/siano/smsdvb-main.c:1231 smsdvb_hotplug() warn: '&client->entry' not removed from list
If an error occur at the end of the registration logic, it won't drop the device from the list.
Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/common/siano/smsdvb-main.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c index cd5bafe9a3ac..7e4100263381 100644 --- a/drivers/media/common/siano/smsdvb-main.c +++ b/drivers/media/common/siano/smsdvb-main.c @@ -1212,6 +1212,10 @@ static int smsdvb_hotplug(struct smscore_device_t *coredev, return 0;
media_graph_error: + mutex_lock(&g_smsdvb_clientslock); + list_del(&client->entry); + mutex_unlock(&g_smsdvb_clientslock); + smsdvb_debugfs_release(client);
client_error:
From: Steve Longerbeam slongerbeam@gmail.com
[ Upstream commit e198be37e52551bb863d07d2edc535d0932a3c4f ]
Some BT.656 sensors (e.g. ADV718x) transmit frames with unstable BT.656 sync codes after initial power on. This confuses the imx CSI,resulting in vertical and/or horizontal sync issues. Skip the first 20 frames to avoid the unstable sync codes.
[fabio: fixed checkpatch warning and increased the frame skipping to 20]
Signed-off-by: Steve Longerbeam slongerbeam@gmail.com Signed-off-by: Fabio Estevam festevam@gmail.com Reviewed-by: Tim Harvey tharvey@gateworks.com Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/imx/imx-media-csi.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index e3bfd635a89a..6a94fff49bf6 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -750,9 +750,10 @@ static int csi_setup(struct csi_priv *priv)
static int csi_start(struct csi_priv *priv) { - struct v4l2_fract *output_fi; + struct v4l2_fract *input_fi, *output_fi; int ret;
+ input_fi = &priv->frame_interval[CSI_SINK_PAD]; output_fi = &priv->frame_interval[priv->active_output_pad];
/* start upstream */ @@ -761,6 +762,17 @@ static int csi_start(struct csi_priv *priv) if (ret) return ret;
+ /* Skip first few frames from a BT.656 source */ + if (priv->upstream_ep.bus_type == V4L2_MBUS_BT656) { + u32 delay_usec, bad_frames = 20; + + delay_usec = DIV_ROUND_UP_ULL((u64)USEC_PER_SEC * + input_fi->numerator * bad_frames, + input_fi->denominator); + + usleep_range(delay_usec, delay_usec + 1000); + } + if (priv->dest == IPU_CSI_DEST_IDMAC) { ret = csi_idmac_start(priv); if (ret)
From: Guenter Roeck linux@roeck-us.net
[ Upstream commit 897f6339893b741a5d68ae8e2475df65946041c2 ]
The MAX31790 has two sets of registers for pwm duty cycles, one to request a duty cycle and one to read the actual current duty cycle. Both do not have to be the same.
When reporting the pwm duty cycle to the user, the actual pwm duty cycle from pwm duty cycle registers needs to be reported. When setting it, the pwm target duty cycle needs to be written. Since we don't know the actual pwm duty cycle after a target pwm duty cycle has been written, set the valid flag to false to indicate that actual pwm duty cycle should be read from the chip instead of using cached values.
Cc: Jan Kundrát jan.kundrat@cesnet.cz Cc: Václav Kubernát kubernat@cesnet.cz Signed-off-by: Guenter Roeck linux@roeck-us.net Tested-by: Václav Kubernát kubernat@ceesnet.cz Reviewed-by: Jan Kundrát jan.kundrat@cesnet.cz Link: https://lore.kernel.org/r/20210526154022.3223012-3-linux@roeck-us.net Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/hwmon/max31790.rst | 3 ++- drivers/hwmon/max31790.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/Documentation/hwmon/max31790.rst b/Documentation/hwmon/max31790.rst index f301385d8cef..54ff0f49e28f 100644 --- a/Documentation/hwmon/max31790.rst +++ b/Documentation/hwmon/max31790.rst @@ -39,5 +39,6 @@ fan[1-12]_input RO fan tachometer speed in RPM fan[1-12]_fault RO fan experienced fault fan[1-6]_target RW desired fan speed in RPM pwm[1-6]_enable RW regulator mode, 0=disabled, 1=manual mode, 2=rpm mode -pwm[1-6] RW fan target duty cycle (0-255) +pwm[1-6] RW read: current pwm duty cycle, + write: target pwm duty cycle (0-255) ================== === ======================================================= diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index 86e6c71db685..8ad7a45bfe68 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -104,7 +104,7 @@ static struct max31790_data *max31790_update_device(struct device *dev) data->tach[NR_CHANNEL + i] = rv; } else { rv = i2c_smbus_read_word_swapped(client, - MAX31790_REG_PWMOUT(i)); + MAX31790_REG_PWM_DUTY_CYCLE(i)); if (rv < 0) goto abort; data->pwm[i] = rv; @@ -299,10 +299,10 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel, err = -EINVAL; break; } - data->pwm[channel] = val << 8; + data->valid = false; err = i2c_smbus_write_word_swapped(client, MAX31790_REG_PWMOUT(channel), - data->pwm[channel]); + val << 8); break; case hwmon_pwm_enable: fan_config = data->fan_config[channel];
From: Guenter Roeck linux@roeck-us.net
[ Upstream commit 148c847c9e5a54b99850617bf9c143af9a344f92 ]
pwmX_enable supports three possible values:
0: Fan control disabled. Duty cycle is fixed to 0% 1: Fan control enabled, pwm mode. Duty cycle is determined by values written into Target Duty Cycle registers. 2: Fan control enabled, rpm mode Duty cycle is adjusted such that fan speed matches the values in Target Count registers
The current code does not do this; instead, it mixes pwm control configuration with fan speed monitoring configuration. Worse, it reports that pwm control would be disabled (pwmX_enable==0) when it is in fact enabled in pwm mode. Part of the problem may be that the chip sets the "TACH input enable" bit on its own whenever the mode bit is set to RPM mode, but that doesn't mean that "TACH input enable" accurately reflects the pwm mode.
Fix it up and only handle pwm control with the pwmX_enable attributes. In the documentation, clarify that disabling pwm control (pwmX_enable=0) sets the pwm duty cycle to 0%. In the code, explain why TACH_INPUT_EN is set together with RPM_MODE.
While at it, only update the configuration register if the configuration has changed, and only update the cached configuration if updating the chip configuration was successful.
Cc: Jan Kundrát jan.kundrat@cesnet.cz Cc: Václav Kubernát kubernat@cesnet.cz Signed-off-by: Guenter Roeck linux@roeck-us.net Tested-by: Václav Kubernát kubernat@cesnet.cz Reviewed-by: Jan Kundrát jan.kundrat@cesnet.cz Link: https://lore.kernel.org/r/20210526154022.3223012-4-linux@roeck-us.net Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/hwmon/max31790.rst | 2 +- drivers/hwmon/max31790.c | 41 ++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/Documentation/hwmon/max31790.rst b/Documentation/hwmon/max31790.rst index 54ff0f49e28f..7b097c3b9b90 100644 --- a/Documentation/hwmon/max31790.rst +++ b/Documentation/hwmon/max31790.rst @@ -38,7 +38,7 @@ Sysfs entries fan[1-12]_input RO fan tachometer speed in RPM fan[1-12]_fault RO fan experienced fault fan[1-6]_target RW desired fan speed in RPM -pwm[1-6]_enable RW regulator mode, 0=disabled, 1=manual mode, 2=rpm mode +pwm[1-6]_enable RW regulator mode, 0=disabled (duty cycle=0%), 1=manual mode, 2=rpm mode pwm[1-6] RW read: current pwm duty cycle, write: target pwm duty cycle (0-255) ================== === ======================================================= diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index 8ad7a45bfe68..76aa96f5b984 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -27,6 +27,7 @@
/* Fan Config register bits */ #define MAX31790_FAN_CFG_RPM_MODE 0x80 +#define MAX31790_FAN_CFG_CTRL_MON 0x10 #define MAX31790_FAN_CFG_TACH_INPUT_EN 0x08 #define MAX31790_FAN_CFG_TACH_INPUT 0x01
@@ -271,12 +272,12 @@ static int max31790_read_pwm(struct device *dev, u32 attr, int channel, *val = data->pwm[channel] >> 8; return 0; case hwmon_pwm_enable: - if (fan_config & MAX31790_FAN_CFG_RPM_MODE) + if (fan_config & MAX31790_FAN_CFG_CTRL_MON) + *val = 0; + else if (fan_config & MAX31790_FAN_CFG_RPM_MODE) *val = 2; - else if (fan_config & MAX31790_FAN_CFG_TACH_INPUT_EN) - *val = 1; else - *val = 0; + *val = 1; return 0; default: return -EOPNOTSUPP; @@ -307,23 +308,33 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel, case hwmon_pwm_enable: fan_config = data->fan_config[channel]; if (val == 0) { - fan_config &= ~(MAX31790_FAN_CFG_TACH_INPUT_EN | - MAX31790_FAN_CFG_RPM_MODE); + fan_config |= MAX31790_FAN_CFG_CTRL_MON; + /* + * Disable RPM mode; otherwise disabling fan speed + * monitoring is not possible. + */ + fan_config &= ~MAX31790_FAN_CFG_RPM_MODE; } else if (val == 1) { - fan_config = (fan_config | - MAX31790_FAN_CFG_TACH_INPUT_EN) & - ~MAX31790_FAN_CFG_RPM_MODE; + fan_config &= ~(MAX31790_FAN_CFG_CTRL_MON | MAX31790_FAN_CFG_RPM_MODE); } else if (val == 2) { - fan_config |= MAX31790_FAN_CFG_TACH_INPUT_EN | - MAX31790_FAN_CFG_RPM_MODE; + fan_config &= ~MAX31790_FAN_CFG_CTRL_MON; + /* + * The chip sets MAX31790_FAN_CFG_TACH_INPUT_EN on its + * own if MAX31790_FAN_CFG_RPM_MODE is set. + * Do it here as well to reflect the actual register + * value in the cache. + */ + fan_config |= (MAX31790_FAN_CFG_RPM_MODE | MAX31790_FAN_CFG_TACH_INPUT_EN); } else { err = -EINVAL; break; } - data->fan_config[channel] = fan_config; - err = i2c_smbus_write_byte_data(client, - MAX31790_REG_FAN_CONFIG(channel), - fan_config); + if (fan_config != data->fan_config[channel]) { + err = i2c_smbus_write_byte_data(client, MAX31790_REG_FAN_CONFIG(channel), + fan_config); + if (!err) + data->fan_config[channel] = fan_config; + } break; default: err = -EOPNOTSUPP;
From: Lukasz Luba lukasz.luba@arm.com
[ Upstream commit 489f16459e0008c7a5c4c5af34bd80898aa82c2d ]
Energy Aware Scheduling (EAS) needs to be able to predict the frequency requests made by the SchedUtil governor to properly estimate energy used in the future. It has to take into account CPUs utilization and forecast Performance Domain (PD) frequency. There is a corner case when the max allowed frequency might be reduced due to thermal. SchedUtil is aware of that reduced frequency, so it should be taken into account also in EAS estimations.
SchedUtil, as a CPUFreq governor, knows the maximum allowed frequency of a CPU, thanks to cpufreq_driver_resolve_freq() and internal clamping to 'policy::max'. SchedUtil is responsible to respect that upper limit while setting the frequency through CPUFreq drivers. This effective frequency is stored internally in 'sugov_policy::next_freq' and EAS has to predict that value.
In the existing code the raw value of arch_scale_cpu_capacity() is used for clamping the returned CPU utilization from effective_cpu_util(). This patch fixes issue with too big single CPU utilization, by introducing clamping to the allowed CPU capacity. The allowed CPU capacity is a CPU capacity reduced by thermal pressure raw value.
Thanks to knowledge about allowed CPU capacity, we don't get too big value for a single CPU utilization, which is then added to the util sum. The util sum is used as a source of information for estimating whole PD energy. To avoid wrong energy estimation in EAS (due to capped frequency), make sure that the calculation of util sum is aware of allowed CPU capacity.
This thermal pressure might be visible in scenarios where the CPUs are not heavily loaded, but some other component (like GPU) drastically reduced available power budget and increased the SoC temperature. Thus, we still use EAS for task placement and CPUs are not over-utilized.
Signed-off-by: Lukasz Luba lukasz.luba@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Reviewed-by: Dietmar Eggemann dietmar.eggemann@arm.com Link: https://lore.kernel.org/r/20210614191128.22735-1-lukasz.luba@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/fair.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 190ae8004a22..e807b743353d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6620,8 +6620,11 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) struct cpumask *pd_mask = perf_domain_span(pd); unsigned long cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask)); unsigned long max_util = 0, sum_util = 0; + unsigned long _cpu_cap = cpu_cap; int cpu;
+ _cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask)); + /* * The capacity state of CPUs of the current rd can be driven by CPUs * of another rd if they belong to the same pd. So, account for the @@ -6657,8 +6660,10 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) * is already enough to scale the EM reported power * consumption at the (eventually clamped) cpu_capacity. */ - sum_util += effective_cpu_util(cpu, util_running, cpu_cap, - ENERGY_UTIL, NULL); + cpu_util = effective_cpu_util(cpu, util_running, cpu_cap, + ENERGY_UTIL, NULL); + + sum_util += min(cpu_util, _cpu_cap);
/* * Performance domain frequency: utilization clamping @@ -6669,7 +6674,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) */ cpu_util = effective_cpu_util(cpu, util_freq, cpu_cap, FREQUENCY_UTIL, tsk); - max_util = max(max_util, cpu_util); + max_util = max(max_util, min(cpu_util, _cpu_cap)); }
return em_cpu_energy(pd->em_pd, max_util, sum_util);
From: Kan Liang kan.liang@linux.intel.com
[ Upstream commit 5471eea5d3bf850316f1064a6f57b34c444bce67 ]
The counter value of a perf task may leak to another RDPMC task. For example, a perf stat task as below is running on CPU 0.
perf stat -e 'branches,cycles' -- taskset -c 0 ./workload
In the meantime, an RDPMC task, which is also running on CPU 0, may read the GP counters periodically. (The RDPMC task creates a fixed event, but read four GP counters.)
$./rdpmc_read_all_counters index 0x0 value 0x8001e5970f99 index 0x1 value 0x8005d750edb6 index 0x2 value 0x0 index 0x3 value 0x0
index 0x0 value 0x8002358e48a5 index 0x1 value 0x8006bd1e3bc9 index 0x2 value 0x0 index 0x3 value 0x0
It is a potential security issue. Once the attacker knows what the other thread is counting. The PerfMon counter can be used as a side-channel to attack cryptosystems.
The counter value of the perf stat task leaks to the RDPMC task because perf never clears the counter when it's stopped.
Three methods were considered to address the issue.
- Unconditionally reset the counter in x86_pmu_del(). It can bring extra overhead even when there is no RDPMC task running.
- Only reset the un-assigned dirty counters when the RDPMC task is scheduled in via sched_task(). It fails for the below case.
Thread A Thread B
clone(CLONE_THREAD) ---> set_affine(0) set_affine(1) while (!event-enabled) ; event = perf_event_open() mmap(event) ioctl(event, IOC_ENABLE); ---> RDPMC
Counters are still leaked to the thread B.
- Only reset the un-assigned dirty counters before updating the CR4.PCE bit. The method is implemented here.
The dirty counter is a counter, on which the assigned event has been deleted, but the counter is not reset. To track the dirty counters, add a 'dirty' variable in the struct cpu_hw_events.
The security issue can only be found with an RDPMC task. To enable the RDMPC, the CR4.PCE bit has to be updated. Add a perf_clear_dirty_counters() right before updating the CR4.PCE bit to clear the existing dirty counters. Only the current un-assigned dirty counters are reset, because the RDPMC assigned dirty counters will be updated soon.
After applying the patch,
$ ./rdpmc_read_all_counters index 0x0 value 0x0 index 0x1 value 0x0 index 0x2 value 0x0 index 0x3 value 0x0
index 0x0 value 0x0 index 0x1 value 0x0 index 0x2 value 0x0 index 0x3 value 0x0
Performance
The performance of a context switch only be impacted when there are two or more perf users and one of the users must be an RDPMC user. In other cases, there is no performance impact.
The worst-case occurs when there are two users: the RDPMC user only uses one counter; while the other user uses all available counters. When the RDPMC task is scheduled in, all the counters, other than the RDPMC assigned one, have to be reset.
Test results for the worst-case, using a modified lat_ctx as measured on an Ice Lake platform, which has 8 GP and 3 FP counters (ignoring SLOTS).
lat_ctx -s 128K -N 1000 processes 2
Without the patch: The context switch time is 4.97 us
With the patch: The context switch time is 5.16 us
There is ~4% performance drop for the context switching time in the worst-case.
Suggested-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Kan Liang kan.liang@linux.intel.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/1623693582-187370-1-git-send-email-kan.liang@linux... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/events/core.c | 28 +++++++++++++++++++++++++++- arch/x86/events/perf_event.h | 1 + arch/x86/include/asm/perf_event.h | 1 + arch/x86/mm/tlb.c | 10 ++++++++-- 4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f71dd72ef95..1eb45139fcc6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1626,6 +1626,8 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto do_del;
+ __set_bit(event->hw.idx, cpuc->dirty); + /* * Not a TXN, therefore cleanup properly. */ @@ -2474,6 +2476,31 @@ static int x86_pmu_event_init(struct perf_event *event) return err; }
+void perf_clear_dirty_counters(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + /* Don't need to clear the assigned counter. */ + for (i = 0; i < cpuc->n_events; i++) + __clear_bit(cpuc->assign[i], cpuc->dirty); + + if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX)) + return; + + for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { + /* Metrics and fake events don't have corresponding HW counters. */ + if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) + continue; + else if (i >= INTEL_PMC_IDX_FIXED) + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); + else + wrmsrl(x86_pmu_event_addr(i), 0); + } + + bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); +} + static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) @@ -2497,7 +2524,6 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ad87cb36f7c8..2bf1c7ea2758 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -229,6 +229,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled;
int n_events; /* the # of events in the below arrays */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 544f41a179fb..8fc1b5003713 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -478,6 +478,7 @@ struct x86_pmu_lbr {
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 78804680e923..cfe6b1e85fa6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,6 +14,7 @@ #include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> +#include <asm/perf_event.h>
#include "mm_internal.h"
@@ -404,9 +405,14 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || (!static_branch_unlikely(&rdpmc_never_available_key) && - atomic_read(&mm->context.perf_rdpmc_allowed))) + atomic_read(&mm->context.perf_rdpmc_allowed))) { + /* + * Clear the existing dirty counters to + * prevent the leak for an RDPMC task. + */ + perf_clear_dirty_counters(); cr4_set_bits_irqsoff(X86_CR4_PCE); - else + } else cr4_clear_bits_irqsoff(X86_CR4_PCE); }
From: Jing Xiangfeng jingxiangfeng@huawei.com
[ Upstream commit d96b1b8c9f79b6bb234a31c80972a6f422079376 ]
ddr_perf_probe() misses to call ida_simple_remove() in an error path. Jump to cpuhp_state_err to fix it.
Signed-off-by: Jing Xiangfeng jingxiangfeng@huawei.com Reviewed-by: Dong Aisheng aisheng.dong@nxp.com Link: https://lore.kernel.org/r/20210617122614.166823-1-jingxiangfeng@huawei.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/perf/fsl_imx8_ddr_perf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 2bbb93188064..7b87aaf267d5 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -705,8 +705,10 @@ static int ddr_perf_probe(struct platform_device *pdev)
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", num); - if (!name) - return -ENOMEM; + if (!name) { + ret = -ENOMEM; + goto cpuhp_state_err; + }
pmu->devtype_data = of_device_get_match_data(&pdev->dev);
From: Marc Zyngier maz@kernel.org
[ Upstream commit d0c94c49792cf780cbfefe29f81bb8c3b73bc76b ]
Restoring a guest with an active virtual PMU results in no perf counters being instanciated on the host side. Not quite what you'd expect from a restore.
In order to fix this, force a writeback of PMCR_EL0 on the first run of a vcpu (using a new request so that it happens once the vcpu has been loaded). This will in turn create all the host-side counters that were missing.
Reported-by: Jinank Jain jinankj@amazon.de Tested-by: Jinank Jain jinankj@amazon.de Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/87wnrbylxv.wl-maz@kernel.org Link: https://lore.kernel.org/r/b53dfcf9bbc4db7f96154b1cd5188d72b9766358.camel@ama... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 4 ++++ arch/arm64/kvm/pmu-emul.c | 3 +++ 3 files changed, 8 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..6336b4309114 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..facf4d41d32a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -689,6 +689,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) vgic_v4_load(vcpu); preempt_enable(); } + + if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) + kvm_pmu_handle_pmcr(vcpu, + __vcpu_sys_reg(vcpu, PMCR_EL0)); } }
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fd167d4f4215..a0bbb7111f57 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -850,6 +850,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; }
+ /* One-off reload of the PMU on first run */ + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + return 0; }
From: Suraj Jitindar Singh sjitindarsingh@gmail.com
[ Upstream commit 77bbbc0cf84834ed130838f7ac1988567f4d0288 ]
The POWER9 vCPU TLB management code assumes all threads in a core share a TLB, and that TLBIEL execued by one thread will invalidate TLBs for all threads. This is not the case for SMT8 capable POWER9 and POWER10 (big core) processors, where the TLB is split between groups of threads. This results in TLB multi-hits, random data corruption, etc.
Fix this by introducing cpu_first_tlb_thread_sibling etc., to determine which siblings share TLBs, and use that in the guest TLB flushing code.
[npiggin@gmail.com: add changelog and comment]
Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Nicholas Piggin npiggin@gmail.com Reviewed-by: Fabiano Rosas farosas@linux.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Link: https://lore.kernel.org/r/20210602040441.3984352-1-npiggin@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/include/asm/cputhreads.h | 30 +++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 13 ++++++------ arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- 4 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 98c8bd155bf9..b167186aaee4 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); }
+/* + * tlb_thread_siblings are siblings which share a TLB. This is not + * architected, is not something a hypervisor could emulate and a future + * CPU may change behaviour even in compat mode, so this should only be + * used on PowerNV, and only with care. + */ +static inline int cpu_first_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu & ~0x6; /* Big Core */ + else + return cpu_first_thread_sibling(cpu); +} + +static inline int cpu_last_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu | 0x6; /* Big Core */ + else + return cpu_last_thread_sibling(cpu); +} + +static inline int cpu_tlb_thread_sibling_step(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return 2; /* Big Core */ + else + return 1; +} + static inline u32 get_tensr(void) { #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index bc0813644666..67cc164c4ac1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2657,7 +2657,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) cpumask_t *cpu_in_guest; int i;
- cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); if (nested) { cpumask_set_cpu(cpu, &nested->need_tlb_flush); cpu_in_guest = &nested->cpu_in_guest; @@ -2671,9 +2671,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) * the other side is the first smp_mb() in kvmppc_run_core(). */ smp_mb(); - for (i = 0; i < threads_per_core; ++i) - if (cpumask_test_cpu(cpu + i, cpu_in_guest)) - smp_call_function_single(cpu + i, do_nothing, NULL, 1); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + if (cpumask_test_cpu(i, cpu_in_guest)) + smp_call_function_single(i, do_nothing, NULL, 1); }
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2704,8 +2705,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) */ if (prev_cpu != pcpu) { if (prev_cpu >= 0 && - cpu_first_thread_sibling(prev_cpu) != - cpu_first_thread_sibling(pcpu)) + cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) radix_flush_cpu(kvm, prev_cpu, vcpu); if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7a0e33a9c980..3edc25c89092 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -800,7 +800,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, * Thus we make all 4 threads use the same bit. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_thread_sibling(pcpu); + pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested) need_tlb_flush = &nested->need_tlb_flush; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7a0f12404e0e..502d9ebe3ae4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -56,7 +56,7 @@ static int global_invalidates(struct kvm *kvm) * so use the bit for the first thread to represent the core. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); }
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit bb385bedded3ccbd794559600de4a09448810f4a ]
If we get an error while looking up the inode item we'll simply bail without cleaning up the delayed node. This results in this style of warning happening on commit:
WARNING: CPU: 0 PID: 76403 at fs/btrfs/delayed-inode.c:1365 btrfs_assert_delayed_root_empty+0x5b/0x90 CPU: 0 PID: 76403 Comm: fsstress Tainted: G W 5.13.0-rc1+ #373 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:btrfs_assert_delayed_root_empty+0x5b/0x90 RSP: 0018:ffffb8bb815a7e50 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff95d6d07e1888 RCX: ffff95d6c0fa3000 RDX: 0000000000000002 RSI: 000000000029e91c RDI: ffff95d6c0fc8060 RBP: ffff95d6c0fc8060 R08: 00008d6d701a2c1d R09: 0000000000000000 R10: ffff95d6d1760ea0 R11: 0000000000000001 R12: ffff95d6c15a4d00 R13: ffff95d6c0fa3000 R14: 0000000000000000 R15: ffffb8bb815a7e90 FS: 00007f490e8dbb80(0000) GS:ffff95d73bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6e75555cb0 CR3: 00000001101ce001 CR4: 0000000000370ef0 Call Trace: btrfs_commit_transaction+0x43c/0xb00 ? finish_wait+0x80/0x80 ? vfs_fsync_range+0x90/0x90 iterate_supers+0x8c/0x100 ksys_sync+0x50/0x90 __do_sys_sync+0xa/0x10 do_syscall_64+0x3d/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae
Because the iref isn't dropped and this leaves an elevated node->count, so any release just re-queues it onto the delayed inodes list. Fix this by going to the out label to handle the proper cleanup of the delayed node.
Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/delayed-inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1a88f6214ebc..3091540fc22a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1009,12 +1009,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, nofs_flag = memalloc_nofs_save(); ret = btrfs_lookup_inode(trans, root, path, &key, mod); memalloc_nofs_restore(nofs_flag); - if (ret > 0) { - btrfs_release_path(path); - return -ENOENT; - } else if (ret < 0) { - return ret; - } + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out;
leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0],
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 04587ad9bef6ce9d510325b4ba9852b6129eebdb ]
If we fail to update the delayed inode we need to abort the transaction, because we could leave an inode with the improper counts or some other such corruption behind.
Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/delayed-inode.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 3091540fc22a..3bb8b919d2c1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1050,6 +1050,14 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0)); btrfs_release_delayed_inode(node);
+ /* + * If we fail to update the delayed inode we need to abort the + * transaction, because we could leave the inode with the improper + * counts behind. + */ + if (ret && ret != -ENOENT) + btrfs_abort_transaction(trans, ret); + return ret;
search:
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 5963ffcaf383134985a5a2d8a4baa582d3999e0a ]
While stress testing our error handling I noticed that sometimes we would still commit the transaction even though we had aborted the transaction.
Currently we track if a trans handle has dirtied any metadata, and if it hasn't we mark the filesystem as having an error (so no new transactions can be started), but we will allow the current transaction to complete as we do not mark the transaction itself as having been aborted.
This sounds good in theory, but we were not properly tracking IO errors in btrfs_finish_ordered_io, and thus committing the transaction with bogus free space data. This isn't necessarily a problem per-se with the free space cache, as the other guards in place would have kept us from accepting the free space cache as valid, but highlights a real world case where we had a bug and could have corrupted the filesystem because of it.
This "skip abort on empty trans handle" is nice in theory, but assumes we have perfect error handling everywhere, which we clearly do not. Also we do not allow further transactions to be started, so all this does is save the last transaction that was happening, which doesn't necessarily gain us anything other than the potential for real corruption.
Remove this particular bit of code, if we decide we need to abort the transaction then abort the current one and keep us from doing real harm to the file system, regardless of whether this specific trans handle dirtied anything or not.
Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.c | 5 +---- fs/btrfs/extent-tree.c | 1 - fs/btrfs/super.c | 11 ----------- fs/btrfs/transaction.c | 8 -------- fs/btrfs/transaction.h | 1 - 5 files changed, 1 insertion(+), 25 deletions(-)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a484fb72a01f..4bc3ca2cbd7d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -596,7 +596,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, fs_info->generation);
if (!should_cow_block(trans, root, buf)) { - trans->dirty = true; *cow_ret = buf; return 0; } @@ -1788,10 +1787,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) { - trans->dirty = true; + if (!should_cow_block(trans, root, b)) goto cow_done; - }
/* * must have write locks on this node and the diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d5c35e4cb76..d2f39a122d89 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4784,7 +4784,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4a396c1147f1..bc613218c8c5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -299,17 +299,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = trans->fs_info;
WRITE_ONCE(trans->aborted, errno); - /* Nothing used. The other threads that have joined this - * transaction may be able to continue. */ - if (!trans->dirty && list_empty(&trans->new_bgs)) { - const char *errstr; - - errstr = btrfs_decode_error(errno); - btrfs_warn(fs_info, - "%s:%d: Aborting unused transaction(%s).", - function, line, errstr); - return; - } WRITE_ONCE(trans->transaction->aborted, errno); /* Wake up anybody who may be waiting on this transaction */ wake_up(&fs_info->transaction_wait); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f75de9f6c0ad..e0a82aa7da89 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2074,14 +2074,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ASSERT(refcount_read(&trans->use_count) == 1);
- /* - * Some places just start a transaction to commit it. We need to make - * sure that if this commit fails that the abort code actually marks the - * transaction as failed, so set trans->dirty to make the abort code do - * the right thing. - */ - trans->dirty = true; - /* Stop the commit early if ->aborted is set */ if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 364cfbb4c5c5..c49e2266b28b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -143,7 +143,6 @@ struct btrfs_trans_handle { bool allocating_chunk; bool can_flush_pending_bgs; bool reloc_reserved; - bool dirty; bool in_fsync; struct btrfs_root *root; struct btrfs_fs_info *fs_info;
From: David Sterba dsterba@suse.com
[ Upstream commit 8c5ec995616f1202ab92e195fd75d6f60d86f85c ]
The type of discard_bitmap_bytes and discard_extent_bytes is u64 so the format should be %llu, though the actual values would hardly ever overflow to negative values.
Reviewed-by: Qu Wenruo wqu@suse.com Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 436ac7b4b334..4f5b14cd3a19 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -429,7 +429,7 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
- return scnprintf(buf, PAGE_SIZE, "%lld\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->discard_ctl.discard_bitmap_bytes); } BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show); @@ -451,7 +451,7 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
- return scnprintf(buf, PAGE_SIZE, "%lld\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->discard_ctl.discard_extent_bytes); } BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
From: Qu Wenruo wqu@suse.com
[ Upstream commit 8df507cbb5952719353c912a021b66c27641e90c ]
[BUG] For the following file layout, scrub will not be able to repair all these two repairable error, but in fact make one corruption even unrepairable:
inode offset 0 4k 8K Mirror 1 |XXXXXX| | Mirror 2 | |XXXXXX|
[CAUSE] The root cause is the hard coded PAGE_SIZE, which makes scrub repair to go crazy for subpage.
For above case, when reading the first sector, we use PAGE_SIZE other than sectorsize to read, which makes us to read the full range [0, 64K). In fact, after 8K there may be no data at all, we can just get some garbage.
Then when doing the repair, we also writeback a full page from mirror 2, this means, we will also writeback the corrupted data in mirror 2 back to mirror 1, leaving the range [4K, 8K) unrepairable.
[FIX] This patch will modify the following PAGE_SIZE use with sectorsize:
- scrub_print_warning_inode() Remove the min() and replace PAGE_SIZE with sectorsize. The min() makes no sense, as csum is done for the full sector with padding.
This fixes a bug that subpage report extra length like: checksum error at logical 298844160 on dev /dev/mapper/arm_nvme-test, physical 575668224, root 5, inode 257, offset 0, length 12288, links 1 (path: file)
Where the error is only 1 sector.
- scrub_handle_errored_block() Comments with PAGE|page involved, all changed to sector.
- scrub_setup_recheck_block() - scrub_repair_page_from_good_copy() - scrub_add_page_to_wr_bio() - scrub_wr_submit() - scrub_add_page_to_rd_bio() - scrub_block_complete() Replace PAGE_SIZE with sectorsize. This solves several problems where we read/write extra range for subpage case.
RAID56 code is excluded intentionally, as RAID56 has extra PAGE_SIZE usage, and is not really safe enough. Thus we will reject RAID56 for subpage in later commit.
Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/scrub.c | 82 +++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 40 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 485cda3eb8d7..6af901539ef2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -626,7 +626,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *warn_ctx) { - u64 isize; u32 nlink; int ret; int i; @@ -662,7 +661,6 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, eb = swarn->path->nodes[0]; inode_item = btrfs_item_ptr(eb, swarn->path->slots[0], struct btrfs_inode_item); - isize = btrfs_inode_size(eb, inode_item); nlink = btrfs_inode_nlink(eb, inode_item); btrfs_release_path(swarn->path);
@@ -691,12 +689,12 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)", +"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", swarn->errstr, swarn->logical, rcu_str_deref(swarn->dev->name), swarn->physical, root, inum, offset, - min(isize - offset, (u64)PAGE_SIZE), nlink, + fs_info->sectorsize, nlink, (char *)(unsigned long)ipath->fspath->val[i]);
btrfs_put_root(local_root); @@ -885,25 +883,25 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) * read all mirrors one after the other. This includes to * re-read the extent or metadata block that failed (that was * the cause that this fixup code is called) another time, - * page by page this time in order to know which pages + * sector by sector this time in order to know which sectors * caused I/O errors and which ones are good (for all mirrors). * It is the goal to handle the situation when more than one * mirror contains I/O errors, but the errors do not * overlap, i.e. the data can be repaired by selecting the - * pages from those mirrors without I/O error on the - * particular pages. One example (with blocks >= 2 * PAGE_SIZE) - * would be that mirror #1 has an I/O error on the first page, - * the second page is good, and mirror #2 has an I/O error on - * the second page, but the first page is good. - * Then the first page of the first mirror can be repaired by - * taking the first page of the second mirror, and the - * second page of the second mirror can be repaired by - * copying the contents of the 2nd page of the 1st mirror. - * One more note: if the pages of one mirror contain I/O + * sectors from those mirrors without I/O error on the + * particular sectors. One example (with blocks >= 2 * sectorsize) + * would be that mirror #1 has an I/O error on the first sector, + * the second sector is good, and mirror #2 has an I/O error on + * the second sector, but the first sector is good. + * Then the first sector of the first mirror can be repaired by + * taking the first sector of the second mirror, and the + * second sector of the second mirror can be repaired by + * copying the contents of the 2nd sector of the 1st mirror. + * One more note: if the sectors of one mirror contain I/O * errors, the checksum cannot be verified. In order to get * the best data for repairing, the first attempt is to find * a mirror without I/O errors and with a validated checksum. - * Only if this is not possible, the pages are picked from + * Only if this is not possible, the sectors are picked from * mirrors with I/O errors without considering the checksum. * If the latter is the case, at the end, the checksum of the * repaired area is verified in order to correctly maintain @@ -1060,26 +1058,26 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
/* * In case of I/O errors in the area that is supposed to be - * repaired, continue by picking good copies of those pages. - * Select the good pages from mirrors to rewrite bad pages from + * repaired, continue by picking good copies of those sectors. + * Select the good sectors from mirrors to rewrite bad sectors from * the area to fix. Afterwards verify the checksum of the block * that is supposed to be repaired. This verification step is * only done for the purpose of statistic counting and for the * final scrub report, whether errors remain. * A perfect algorithm could make use of the checksum and try - * all possible combinations of pages from the different mirrors + * all possible combinations of sectors from the different mirrors * until the checksum verification succeeds. For example, when - * the 2nd page of mirror #1 faces I/O errors, and the 2nd page + * the 2nd sector of mirror #1 faces I/O errors, and the 2nd sector * of mirror #2 is readable but the final checksum test fails, - * then the 2nd page of mirror #3 could be tried, whether now + * then the 2nd sector of mirror #3 could be tried, whether now * the final checksum succeeds. But this would be a rare * exception and is therefore not implemented. At least it is * avoided that the good copy is overwritten. * A more useful improvement would be to pick the sectors * without I/O error based on sector sizes (512 bytes on legacy - * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one + * disks) instead of on sectorsize. Then maybe 512 byte of one * mirror could be repaired by taking 512 byte of a different - * mirror, even if other 512 byte sectors in the same PAGE_SIZE + * mirror, even if other 512 byte sectors in the same sectorsize * area are unreadable. */ success = 1; @@ -1260,7 +1258,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, { struct scrub_ctx *sctx = original_sblock->sctx; struct btrfs_fs_info *fs_info = sctx->fs_info; - u64 length = original_sblock->page_count * PAGE_SIZE; + u64 length = original_sblock->page_count * fs_info->sectorsize; u64 logical = original_sblock->pagev[0]->logical; u64 generation = original_sblock->pagev[0]->generation; u64 flags = original_sblock->pagev[0]->flags; @@ -1283,13 +1281,13 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, */
while (length > 0) { - sublen = min_t(u64, length, PAGE_SIZE); + sublen = min_t(u64, length, fs_info->sectorsize); mapped_length = sublen; bbio = NULL;
/* - * with a length of PAGE_SIZE, each returned stripe - * represents one mirror + * With a length of sectorsize, each returned stripe represents + * one mirror */ btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, @@ -1480,7 +1478,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, bio = btrfs_io_bio_alloc(1); bio_set_dev(bio, spage->dev->bdev);
- bio_add_page(bio, spage->page, PAGE_SIZE, 0); + bio_add_page(bio, spage->page, fs_info->sectorsize, 0); bio->bi_iter.bi_sector = spage->physical >> 9; bio->bi_opf = REQ_OP_READ;
@@ -1544,6 +1542,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, struct scrub_page *spage_bad = sblock_bad->pagev[page_num]; struct scrub_page *spage_good = sblock_good->pagev[page_num]; struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info; + const u32 sectorsize = fs_info->sectorsize;
BUG_ON(spage_bad->page == NULL); BUG_ON(spage_good->page == NULL); @@ -1563,8 +1562,8 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, bio->bi_iter.bi_sector = spage_bad->physical >> 9; bio->bi_opf = REQ_OP_WRITE;
- ret = bio_add_page(bio, spage_good->page, PAGE_SIZE, 0); - if (PAGE_SIZE != ret) { + ret = bio_add_page(bio, spage_good->page, sectorsize, 0); + if (ret != sectorsize) { bio_put(bio); return -EIO; } @@ -1642,6 +1641,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, { struct scrub_bio *sbio; int ret; + const u32 sectorsize = sctx->fs_info->sectorsize;
mutex_lock(&sctx->wr_lock); again: @@ -1681,16 +1681,16 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, bio->bi_iter.bi_sector = sbio->physical >> 9; bio->bi_opf = REQ_OP_WRITE; sbio->status = 0; - } else if (sbio->physical + sbio->page_count * PAGE_SIZE != + } else if (sbio->physical + sbio->page_count * sectorsize != spage->physical_for_dev_replace || - sbio->logical + sbio->page_count * PAGE_SIZE != + sbio->logical + sbio->page_count * sectorsize != spage->logical) { scrub_wr_submit(sctx); goto again; }
- ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); - if (ret != PAGE_SIZE) { + ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0); + if (ret != sectorsize) { if (sbio->page_count < 1) { bio_put(sbio->bio); sbio->bio = NULL; @@ -1729,7 +1729,8 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) btrfsic_submit_bio(sbio->bio);
if (btrfs_is_zoned(sctx->fs_info)) - sctx->write_pointer = sbio->physical + sbio->page_count * PAGE_SIZE; + sctx->write_pointer = sbio->physical + sbio->page_count * + sctx->fs_info->sectorsize; }
static void scrub_wr_bio_end_io(struct bio *bio) @@ -2006,6 +2007,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, { struct scrub_block *sblock = spage->sblock; struct scrub_bio *sbio; + const u32 sectorsize = sctx->fs_info->sectorsize; int ret;
again: @@ -2044,9 +2046,9 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, bio->bi_iter.bi_sector = sbio->physical >> 9; bio->bi_opf = REQ_OP_READ; sbio->status = 0; - } else if (sbio->physical + sbio->page_count * PAGE_SIZE != + } else if (sbio->physical + sbio->page_count * sectorsize != spage->physical || - sbio->logical + sbio->page_count * PAGE_SIZE != + sbio->logical + sbio->page_count * sectorsize != spage->logical || sbio->dev != spage->dev) { scrub_submit(sctx); @@ -2054,8 +2056,8 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, }
sbio->pagev[sbio->page_count] = spage; - ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); - if (ret != PAGE_SIZE) { + ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0); + if (ret != sectorsize) { if (sbio->page_count < 1) { bio_put(sbio->bio); sbio->bio = NULL; @@ -2398,7 +2400,7 @@ static void scrub_block_complete(struct scrub_block *sblock) if (sblock->sparity && corrupted && !sblock->data_corrected) { u64 start = sblock->pagev[0]->logical; u64 end = sblock->pagev[sblock->page_count - 1]->logical + - PAGE_SIZE; + sblock->sctx->fs_info->sectorsize;
ASSERT(end - start <= U32_MAX); scrub_parity_mark_sectors_error(sblock->sparity,
From: Qu Wenruo wqu@suse.com
[ Upstream commit 87b4d86baae219a9a79f6b0a1434b2a42fd40d09 ]
Currently we use page Private2 bit to indicate that we have ordered extent for the page range.
But the lifespan of it is not consistent, during regular writeback path, there are two locations to clear the same PagePrivate2:
T ----- Page marked Dirty | + ----- Page marked Private2, through btrfs_run_dealloc_range() | + ----- Page cleared Private2, through btrfs_writepage_cow_fixup() | in __extent_writepage_io() | ^^^ Private2 cleared for the first time | + ----- Page marked Writeback, through btrfs_set_range_writeback() | in __extent_writepage_io(). | + ----- Page cleared Private2, through | btrfs_writepage_endio_finish_ordered() | ^^^ Private2 cleared for the second time. | + ----- Page cleared Writeback, through btrfs_writepage_endio_finish_ordered()
Currently PagePrivate2 is mostly to prevent ordered extent accounting being executed for both endio and invalidatepage. Thus only the one who cleared page Private2 is responsible for ordered extent accounting.
But the fact is, in btrfs_writepage_endio_finish_ordered(), page Private2 is cleared and ordered extent accounting is executed unconditionally.
The race prevention only happens through btrfs_invalidatepage(), where we wait for the page writeback first, before checking the Private2 bit.
This means, Private2 is also protected by Writeback bit, and there is no need for btrfs_writepage_cow_fixup() to clear Priavte2.
This patch will change btrfs_writepage_cow_fixup() to just check PagePrivate2, not to clear it. The clearing will happen in either btrfs_invalidatepage() or btrfs_writepage_endio_finish_ordered().
This makes the Private2 bit easier to understand, just meaning the page has unfinished ordered extent attached to it.
And this patch is a hard requirement for the incoming refactoring for how we finished ordered IO for endio context, as the coming patch will check Private2 to determine if we need to do the ordered extent accounting. Thus this patch is definitely needed or we will hang due to unfinished ordered extent.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 46f392943f4d..f9942560866b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2677,7 +2677,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) struct btrfs_writepage_fixup *fixup;
/* this page is properly in the ordered list */ - if (TestClearPagePrivate2(page)) + if (PagePrivate2(page)) return 0;
/*
On Sun, Jul 04, 2021 at 07:04:10PM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit 87b4d86baae219a9a79f6b0a1434b2a42fd40d09 ]
Currently we use page Private2 bit to indicate that we have ordered extent for the page range.
But the lifespan of it is not consistent, during regular writeback path, there are two locations to clear the same PagePrivate2:
T ----- Page marked Dirty | + ----- Page marked Private2, through btrfs_run_dealloc_range() | + ----- Page cleared Private2, through btrfs_writepage_cow_fixup() | in __extent_writepage_io() | ^^^ Private2 cleared for the first time | + ----- Page marked Writeback, through btrfs_set_range_writeback() | in __extent_writepage_io(). | + ----- Page cleared Private2, through | btrfs_writepage_endio_finish_ordered() | ^^^ Private2 cleared for the second time. | + ----- Page cleared Writeback, through btrfs_writepage_endio_finish_ordered()
Currently PagePrivate2 is mostly to prevent ordered extent accounting being executed for both endio and invalidatepage. Thus only the one who cleared page Private2 is responsible for ordered extent accounting.
But the fact is, in btrfs_writepage_endio_finish_ordered(), page Private2 is cleared and ordered extent accounting is executed unconditionally.
The race prevention only happens through btrfs_invalidatepage(), where we wait for the page writeback first, before checking the Private2 bit.
This means, Private2 is also protected by Writeback bit, and there is no need for btrfs_writepage_cow_fixup() to clear Priavte2.
This patch will change btrfs_writepage_cow_fixup() to just check PagePrivate2, not to clear it. The clearing will happen in either btrfs_invalidatepage() or btrfs_writepage_endio_finish_ordered().
This makes the Private2 bit easier to understand, just meaning the page has unfinished ordered extent attached to it.
And this patch is a hard requirement for the incoming refactoring for how we finished ordered IO for endio context, as the coming patch will check Private2 to determine if we need to do the ordered extent accounting. Thus this patch is definitely needed or we will hang due to unfinished ordered extent.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
Please drop this patch from all autosel stable backports. This is not a standalone fix and the CC: stable@ is not there intentionally.
All patches that go through my tree are evaluated for stable backports up to 4.4 so it's unlikely the machinery you're using can find something I've overlooked.
The patches that autosel picks and do not get a complain^Wreply for me are below the bar I'd consider it for stable but if after another review the patch "does no harm", I let it pass because you're obviously handling the backports. I would not tag it myself to avoid increasing load of Greg with patches that don't matter much.
On Wed, Jul 07, 2021 at 01:10:05PM +0200, David Sterba wrote:
On Sun, Jul 04, 2021 at 07:04:10PM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit 87b4d86baae219a9a79f6b0a1434b2a42fd40d09 ]
Currently we use page Private2 bit to indicate that we have ordered extent for the page range.
But the lifespan of it is not consistent, during regular writeback path, there are two locations to clear the same PagePrivate2:
T ----- Page marked Dirty | + ----- Page marked Private2, through btrfs_run_dealloc_range() | + ----- Page cleared Private2, through btrfs_writepage_cow_fixup() | in __extent_writepage_io() | ^^^ Private2 cleared for the first time | + ----- Page marked Writeback, through btrfs_set_range_writeback() | in __extent_writepage_io(). | + ----- Page cleared Private2, through | btrfs_writepage_endio_finish_ordered() | ^^^ Private2 cleared for the second time. | + ----- Page cleared Writeback, through btrfs_writepage_endio_finish_ordered()
Currently PagePrivate2 is mostly to prevent ordered extent accounting being executed for both endio and invalidatepage. Thus only the one who cleared page Private2 is responsible for ordered extent accounting.
But the fact is, in btrfs_writepage_endio_finish_ordered(), page Private2 is cleared and ordered extent accounting is executed unconditionally.
The race prevention only happens through btrfs_invalidatepage(), where we wait for the page writeback first, before checking the Private2 bit.
This means, Private2 is also protected by Writeback bit, and there is no need for btrfs_writepage_cow_fixup() to clear Priavte2.
This patch will change btrfs_writepage_cow_fixup() to just check PagePrivate2, not to clear it. The clearing will happen in either btrfs_invalidatepage() or btrfs_writepage_endio_finish_ordered().
This makes the Private2 bit easier to understand, just meaning the page has unfinished ordered extent attached to it.
And this patch is a hard requirement for the incoming refactoring for how we finished ordered IO for endio context, as the coming patch will check Private2 to determine if we need to do the ordered extent accounting. Thus this patch is definitely needed or we will hang due to unfinished ordered extent.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
Please drop this patch from all autosel stable backports. This is not a standalone fix and the CC: stable@ is not there intentionally.
Will do.
All patches that go through my tree are evaluated for stable backports up to 4.4 so it's unlikely the machinery you're using can find something I've overlooked.
If you'd like, I can make it ignore fs/btrfs/. The tool is there to help maintainers who aren't as diligent w.r.t tagging patches for stable, not to create extra noise for something.
We can ofcourse also keep the current workflow if you think that it's helpful in some way.
The patches that autosel picks and do not get a complain^Wreply for me are below the bar I'd consider it for stable but if after another review the patch "does no harm", I let it pass because you're obviously handling the backports. I would not tag it myself to avoid increasing load of Greg with patches that don't matter much.
I'd say don't worry about this side of things: we're trying to get any fixes in, without too much regard to whether the fix is for something "big" or "small". We'd prefer to go over extra patches here in exchange for a much better user experience :)
From: Qu Wenruo wqu@suse.com
[ Upstream commit 0528476b6ac7832f31e2ed740a57ae31316b124e ]
[BUG] With current subpage RW support, the following script can hang the fs with 64K page size.
# mkfs.btrfs -f -s 4k $dev # mount $dev -o nospace_cache $mnt # fsstress -w -n 50 -p 1 -s 1607749395 -d $mnt
The kernel will do an infinite loop in btrfs_punch_hole_lock_range().
[CAUSE] In btrfs_punch_hole_lock_range() we:
- Truncate page cache range - Lock extent io tree - Wait any ordered extents in the range.
We exit the loop until we meet all the following conditions:
- No ordered extent in the lock range - No page is in the lock range
The latter condition has a pitfall, it only works for sector size == PAGE_SIZE case.
While can't handle the following subpage case:
0 32K 64K 96K 128K | |///////||//////| ||
lockstart=32K lockend=96K - 1
In this case, although the range crosses 2 pages, truncate_pagecache_range() will invalidate no page at all, but only zero the [32K, 96K) range of the two pages.
Thus filemap_range_has_page(32K, 96K-1) will always return true, thus we will never meet the loop exit condition.
[FIX] Fix the problem by doing page alignment for the lock range.
Function filemap_range_has_page() has already handled lend < lstart case, we only need to round up @lockstart, and round_down @lockend for truncate_pagecache_range().
This modification should not change any thing for sector size == PAGE_SIZE case, as in that case our range is already page aligned.
Tested-by: Ritesh Harjani riteshh@linux.ibm.com # [ppc64] Tested-by: Anand Jain anand.jain@oracle.com # [aarch64] Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 55f68422061d..7e6ed97a6d08 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2483,6 +2483,17 @@ static int btrfs_punch_hole_lock_range(struct inode *inode, const u64 lockend, struct extent_state **cached_state) { + /* + * For subpage case, if the range is not at page boundary, we could + * have pages at the leading/tailing part of the range. + * This could lead to dead loop since filemap_range_has_page() + * will always return true. + * So here we need to do extra page alignment for + * filemap_range_has_page(). + */ + const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); + const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; + while (1) { struct btrfs_ordered_extent *ordered; int ret; @@ -2503,7 +2514,7 @@ static int btrfs_punch_hole_lock_range(struct inode *inode, (ordered->file_offset + ordered->num_bytes <= lockstart || ordered->file_offset > lockend)) && !filemap_range_has_page(inode->i_mapping, - lockstart, lockend)) { + page_lockstart, page_lockend)) { if (ordered) btrfs_put_ordered_extent(ordered); break;
From: Qu Wenruo wqu@suse.com
[ Upstream commit bcd77455d590eaa0422a5e84ae852007cfce574a ]
[BUG] With current btrfs subpage rw support, the following script can lead to fs hang:
$ mkfs.btrfs -f -s 4k $dev $ mount $dev -o nospace_cache $mnt $ fsstress -w -n 100 -p 1 -s 1608140256 -v -d $mnt
The fs will hang at btrfs_start_ordered_extent().
[CAUSE] In above test case, btrfs_invalidate() will be called with the following parameters:
offset = 0 length = 53248 page dirty = 1 subpage dirty bitmap = 0x2000
Since @offset is 0, btrfs_invalidate() will try to invalidate the full page, and finally call clear_page_extent_mapped() which will detach subpage structure from the page.
And since the page no longer has subpage structure, the subpage dirty bitmap will be cleared, preventing the dirty range from being written back, thus no way to wake up the ordered extent.
[FIX] Just follow other filesystems, only to invalidate the page if the range covers the full page.
There are cases like truncate_setsize() which can call btrfs_invalidatepage() with offset == 0 and length != 0 for the last page of an inode.
Although the old code will still try to invalidate the full page, we are still safe to just wait for ordered extent to finish. So it shouldn't cause extra problems.
Tested-by: Ritesh Harjani riteshh@linux.ibm.com # [ppc64] Tested-by: Anand Jain anand.jain@oracle.com # [aarch64] Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9942560866b..5d147f204fca 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8390,7 +8390,19 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, */ wait_on_page_writeback(page);
- if (offset) { + /* + * For subpage case, we have call sites like + * btrfs_punch_hole_lock_range() which passes range not aligned to + * sectorsize. + * If the range doesn't cover the full page, we don't need to and + * shouldn't clear page extent mapped, as page->private can still + * record subpage dirty bits for other part of the range. + * + * For cases that can invalidate the full even the range doesn't + * cover the full page, like invalidating the last page, we're + * still safe to wait for ordered extent to finish. + */ + if (!(offset == 0 && length == PAGE_SIZE)) { btrfs_releasepage(page, GFP_NOFS); return; }
From: Christophe Leroy christophe.leroy@csgroup.eu
[ Upstream commit b05fbcc36be1f8597a1febef4892053a0b2f3f60 ]
With a config having PAGE_SIZE set to 256K, BTRFS build fails with the following message
include/linux/compiler_types.h:326:38: error: call to '__compiletime_assert_791' declared with attribute error: BUILD_BUG_ON failed: (BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0
BTRFS_MAX_COMPRESSED being 128K, BTRFS cannot support platforms with 256K pages at the time being.
There are two platforms that can select 256K pages: - hexagon - powerpc
Disable BTRFS when 256K page size is selected. Supporting this would require changes to the subpage mode that's currently being developed. Given that 256K is many times larger than page sizes commonly used and for what the algorithms and structures have been tuned, it's out of scope and disabling build is a reasonable option.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Christophe Leroy christophe.leroy@csgroup.eu [ update changelog ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/Kconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 68b95ad82126..520a0f6a7d9e 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -18,6 +18,8 @@ config BTRFS_FS select RAID6_PQ select XOR_BLOCKS select SRCU + depends on !PPC_256K_PAGES # powerpc + depends on !PAGE_SIZE_256KB # hexagon
help Btrfs is a general purpose copy-on-write filesystem with extents,
From: Boqun Feng boqun.feng@gmail.com
[ Upstream commit 69c7a5fb2482636f525f016c8333fdb9111ecb9d ]
We use the same code to print backwards lock dependency path as the forwards lock dependency path, and this could result into incorrect printing because for a backwards lock_list ->trace is not the call trace where the lock of ->class is acquired.
Fix this by introducing a separate function on printing the backwards dependency path. Also add a few comments about the printing while we are at it.
Reported-by: Johannes Berg johannes@sipsolutions.net Signed-off-by: Boqun Feng boqun.feng@gmail.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lore.kernel.org/r/20210618170110.3699115-2-boqun.feng@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e32313072506..e51fa6575c39 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2306,7 +2306,56 @@ static void print_lock_class_header(struct lock_class *class, int depth) }
/* - * printk the shortest lock dependencies from @start to @end in reverse order: + * Dependency path printing: + * + * After BFS we get a lock dependency path (linked via ->parent of lock_list), + * printing out each lock in the dependency path will help on understanding how + * the deadlock could happen. Here are some details about dependency path + * printing: + * + * 1) A lock_list can be either forwards or backwards for a lock dependency, + * for a lock dependency A -> B, there are two lock_lists: + * + * a) lock_list in the ->locks_after list of A, whose ->class is B and + * ->links_to is A. In this case, we can say the lock_list is + * "A -> B" (forwards case). + * + * b) lock_list in the ->locks_before list of B, whose ->class is A + * and ->links_to is B. In this case, we can say the lock_list is + * "B <- A" (bacwards case). + * + * The ->trace of both a) and b) point to the call trace where B was + * acquired with A held. + * + * 2) A "helper" lock_list is introduced during BFS, this lock_list doesn't + * represent a certain lock dependency, it only provides an initial entry + * for BFS. For example, BFS may introduce a "helper" lock_list whose + * ->class is A, as a result BFS will search all dependencies starting with + * A, e.g. A -> B or A -> C. + * + * The notation of a forwards helper lock_list is like "-> A", which means + * we should search the forwards dependencies starting with "A", e.g A -> B + * or A -> C. + * + * The notation of a bacwards helper lock_list is like "<- B", which means + * we should search the backwards dependencies ending with "B", e.g. + * B <- A or B <- C. + */ + +/* + * printk the shortest lock dependencies from @root to @leaf in reverse order. + * + * We have a lock dependency path as follow: + * + * @root @leaf + * | | + * V V + * ->parent ->parent + * | lock_list | <--------- | lock_list | ... | lock_list | <--------- | lock_list | + * | -> L1 | | L1 -> L2 | ... |Ln-2 -> Ln-1| | Ln-1 -> Ln| + * + * , so it's natural that we start from @leaf and print every ->class and + * ->trace until we reach the @root. */ static void __used print_shortest_lock_dependencies(struct lock_list *leaf, @@ -2334,6 +2383,61 @@ print_shortest_lock_dependencies(struct lock_list *leaf, } while (entry && (depth >= 0)); }
+/* + * printk the shortest lock dependencies from @leaf to @root. + * + * We have a lock dependency path (from a backwards search) as follow: + * + * @leaf @root + * | | + * V V + * ->parent ->parent + * | lock_list | ---------> | lock_list | ... | lock_list | ---------> | lock_list | + * | L2 <- L1 | | L3 <- L2 | ... | Ln <- Ln-1 | | <- Ln | + * + * , so when we iterate from @leaf to @root, we actually print the lock + * dependency path L1 -> L2 -> .. -> Ln in the non-reverse order. + * + * Another thing to notice here is that ->class of L2 <- L1 is L1, while the + * ->trace of L2 <- L1 is the call trace of L2, in fact we don't have the call + * trace of L1 in the dependency path, which is alright, because most of the + * time we can figure out where L1 is held from the call trace of L2. + */ +static void __used +print_shortest_lock_dependencies_backwards(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + const struct lock_trace *trace = NULL; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + if (trace) { + printk("%*s ... acquired at:\n", depth, ""); + print_lock_trace(trace, 2); + printk("\n"); + } + + /* + * Record the pointer to the trace for the next lock_list + * entry, see the comments for the function. + */ + trace = entry->trace; + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); +} + static void print_irq_lock_scenario(struct lock_list *safe_entry, struct lock_list *unsafe_entry, @@ -2451,7 +2555,7 @@ print_bad_irq_dependency(struct task_struct *curr, prev_root->trace = save_trace(); if (!prev_root->trace) return; - print_shortest_lock_dependencies(backwards_entry, prev_root); + print_shortest_lock_dependencies_backwards(backwards_entry, prev_root);
pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
From: Boqun Feng boqun.feng@gmail.com
[ Upstream commit 7b1f8c6179769af6ffa055e1169610b51d71edd5 ]
In the step #3 of check_irq_usage(), we seach backwards to find a lock whose usage conflicts the usage of @target_entry1 on safe/unsafe. However, we should only keep the irq-unsafe usage of @target_entry1 into consideration, because it could be a case where a lock is hardirq-unsafe but soft-safe, and in check_irq_usage() we find it because its hardirq-unsafe could result into a hardirq-safe-unsafe deadlock, but currently since we don't filter out the other usage bits, so we may find a lock dependency path softirq-unsafe -> softirq-safe, which in fact doesn't cause a deadlock. And this may cause misleading lockdep splats.
Fix this by only keeping LOCKF_ENABLED_IRQ_ALL bits when we try the backwards search.
Reported-by: Johannes Berg johannes@sipsolutions.net Signed-off-by: Boqun Feng boqun.feng@gmail.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lore.kernel.org/r/20210618170110.3699115-4-boqun.feng@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/locking/lockdep.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e51fa6575c39..39b6392b4826 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2773,8 +2773,18 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, * Step 3: we found a bad match! Now retrieve a lock from the backward * list whose usage mask matches the exclusive usage mask from the * lock found on the forward list. + * + * Note, we should only keep the LOCKF_ENABLED_IRQ_ALL bits, considering + * the follow case: + * + * When trying to add A -> B to the graph, we find that there is a + * hardirq-safe L, that L -> ... -> A, and another hardirq-unsafe M, + * that B -> ... -> M. However M is **softirq-safe**, if we use exact + * invert bits of M's usage_mask, we will find another lock N that is + * **softirq-unsafe** and N -> ... -> A, however N -> .. -> M will not + * cause a inversion deadlock. */ - backward_mask = original_mask(target_entry1->class->usage_mask); + backward_mask = original_mask(target_entry1->class->usage_mask & LOCKF_ENABLED_IRQ_ALL);
ret = find_usage_backwards(&this, backward_mask, &target_entry); if (bfs_error(ret)) {
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit 4fa3b91bdee1b08348c82660668ca0ca34e271ad ]
Using register asm statements has been proven to be very error prone, especially when using code instrumentation where gcc may add function calls, which clobbers register contents in an unexpected way.
Therefore get rid of register asm statements in kvm code, even though there is currently nothing wrong with them. This way we know for sure that this bug class won't be introduced here.
Signed-off-by: Heiko Carstens hca@linux.ibm.com Reviewed-by: Christian Borntraeger borntraeger@de.ibm.com Reviewed-by: Thomas Huth thuth@redhat.com Reviewed-by: Cornelia Huck cohuck@redhat.com Reviewed-by: Claudio Imbrenda imbrenda@linux.ibm.com Link: https://lore.kernel.org/r/20210621140356.1210771-1-hca@linux.ibm.com [borntraeger@de.ibm.com: checkpatch strict fix] Signed-off-by: Christian Borntraeger borntraeger@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kvm/kvm-s390.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1296fc10f80c..876fc1f7282a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -329,31 +329,31 @@ static void allow_cpu_feat(unsigned long nr)
static inline int plo_test_bit(unsigned char nr) { - register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + unsigned long function = (unsigned long)nr | 0x100; int cc;
asm volatile( + " lgr 0,%[function]\n" /* Parameter registers are ignored for "test bit" */ " plo 0,0,0,0(0)\n" " ipm %0\n" " srl %0,28\n" : "=d" (cc) - : "d" (r0) - : "cc"); + : [function] "d" (function) + : "cc", "0"); return cc == 0; }
static __always_inline void __insn32_query(unsigned int opcode, u8 *query) { - register unsigned long r0 asm("0") = 0; /* query function */ - register unsigned long r1 asm("1") = (unsigned long) query; - asm volatile( - /* Parameter regs are ignored */ + " lghi 0,0\n" + " lgr 1,%[query]\n" + /* Parameter registers are ignored */ " .insn rrf,%[opc] << 16,2,4,6,0\n" : - : "d" (r0), "a" (r1), [opc] "i" (opcode) - : "cc", "memory"); + : [query] "d" ((unsigned long)query), [opc] "i" (opcode) + : "cc", "memory", "0", "1"); }
#define INSN_SORTL 0xb938
From: Hsin-Hsiung Wang hsin-hsiung.wang@mediatek.com
[ Upstream commit 50c9462edcbf900f3d5097ca3ad60171346124de ]
The valid vsel value are 0 and 12, so the .vsel_mask should be 0xf.
Signed-off-by: Hsin-Hsiung Wang hsin-hsiung.wang@mediatek.com Reviewed-by: Axel Lin axel.lin@ingics.com Link: https://lore.kernel.org/r/1624424169-510-1-git-send-email-hsin-hsiung.wang@m... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/mt6358-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c index 13cb6ac9a892..1d4eb5dc4fac 100644 --- a/drivers/regulator/mt6358-regulator.c +++ b/drivers/regulator/mt6358-regulator.c @@ -457,7 +457,7 @@ static struct mt6358_regulator_info mt6358_regulators[] = { MT6358_REG_FIXED("ldo_vaud28", VAUD28, MT6358_LDO_VAUD28_CON0, 0, 2800000), MT6358_LDO("ldo_vdram2", VDRAM2, vdram2_voltages, vdram2_idx, - MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0x10, 0), + MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0xf, 0), MT6358_LDO("ldo_vsim1", VSIM1, vsim_voltages, vsim_idx, MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00, 8), MT6358_LDO("ldo_vibr", VIBR, vibr_voltages, vibr_idx,
From: Axel Lin axel.lin@ingics.com
[ Upstream commit a336dc8f683e5be794186b5643cd34cb28dd2c53 ]
Use DIV_ROUND_UP to prevent truncation by integer division issue. This ensures we return enough delay time.
Also fix returning negative value when new_sel < old_sel.
Signed-off-by: Axel Lin axel.lin@ingics.com Link: https://lore.kernel.org/r/20210618141412.4014912-1-axel.lin@ingics.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/da9052-regulator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c index e18d291c7f21..23fa429ebe76 100644 --- a/drivers/regulator/da9052-regulator.c +++ b/drivers/regulator/da9052-regulator.c @@ -250,7 +250,8 @@ static int da9052_regulator_set_voltage_time_sel(struct regulator_dev *rdev, case DA9052_ID_BUCK3: case DA9052_ID_LDO2: case DA9052_ID_LDO3: - ret = (new_sel - old_sel) * info->step_uV / 6250; + ret = DIV_ROUND_UP(abs(new_sel - old_sel) * info->step_uV, + 6250); break; }
From: Shuah Khan skhan@linuxfoundation.org
[ Upstream commit 50e7a31d30e8221632675abed3be306382324ca2 ]
Smatch static checker warns that "mdev" can be null:
sound/usb/media.c:287 snd_media_device_create() warn: 'mdev' can also be NULL
If CONFIG_MEDIA_CONTROLLER is disabled, this file should not be included in the build.
The below conditions in the sound/usb/Makefile are in place to ensure that media.c isn't included in the build.
sound/usb/Makefile: snd-usb-audio-$(CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER) += media.o
select SND_USB_AUDIO_USE_MEDIA_CONTROLLER if MEDIA_CONTROLLER && (MEDIA_SUPPORT=y || MEDIA_SUPPORT=SND_USB_AUDIO)
The following config check in include/media/media-dev-allocator.h is in place to enable the API only when CONFIG_MEDIA_CONTROLLER and CONFIG_USB are enabled.
#if defined(CONFIG_MEDIA_CONTROLLER) && defined(CONFIG_USB)
This check doesn't work as intended when CONFIG_USB=m. When CONFIG_USB=m, CONFIG_USB_MODULE is defined and CONFIG_USB is not. The above config check doesn't catch that CONFIG_USB is defined as a module and disables the API. This results in sound/usb enabling Media Controller specific ALSA driver code, while Media disables the Media Controller API.
Fix the problem requires two changes:
1. Change the check to use IS_ENABLED to detect when CONFIG_USB is enabled as a module or static. Since CONFIG_MEDIA_CONTROLLER is a bool, leave the check unchanged to be consistent with drivers/media/Makefile.
2. Change the drivers/media/mc/Makefile to include mc-dev-allocator.o in mc-objs when CONFIG_USB is enabled.
Link: https://lore.kernel.org/alsa-devel/YLeAvT+R22FQ%2FEyw@mwanda/
Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/mc/Makefile | 2 +- include/media/media-dev-allocator.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/mc/Makefile b/drivers/media/mc/Makefile index 119037f0e686..2b7af42ba59c 100644 --- a/drivers/media/mc/Makefile +++ b/drivers/media/mc/Makefile @@ -3,7 +3,7 @@ mc-objs := mc-device.o mc-devnode.o mc-entity.o \ mc-request.o
-ifeq ($(CONFIG_USB),y) +ifneq ($(CONFIG_USB),) mc-objs += mc-dev-allocator.o endif
diff --git a/include/media/media-dev-allocator.h b/include/media/media-dev-allocator.h index b35ea6062596..2ab54d426c64 100644 --- a/include/media/media-dev-allocator.h +++ b/include/media/media-dev-allocator.h @@ -19,7 +19,7 @@
struct usb_device;
-#if defined(CONFIG_MEDIA_CONTROLLER) && defined(CONFIG_USB) +#if defined(CONFIG_MEDIA_CONTROLLER) && IS_ENABLED(CONFIG_USB) /** * media_device_usb_allocate() - Allocate and return struct &media device *
From: Rodrigo Campos rodrigo@kinvolk.io
[ Upstream commit 0ae71c7720e3ae3aabd2e8a072d27f7bd173d25c ]
Alban Crequy reported a race condition userspace faces when we want to add some fds and make the syscall return them[1] using seccomp notify.
The problem is that currently two different ioctl() calls are needed by the process handling the syscalls (agent) for another userspace process (target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible for the agent to do the first ioctl to add a file descriptor but the target is interrupted (EINTR) before the agent does the second ioctl() call.
This patch adds a flag to the ADDFD ioctl() so it adds the fd and returns that value atomically to the target program, as suggested by Kees Cook[2]. This is done by simply allowing seccomp_do_user_notification() to add the fd and return it in this case. Therefore, in this case the target wakes up from the wait in seccomp_do_user_notification() either to interrupt the syscall or to add the fd and return it.
This "allocate an fd and return" functionality is useful for syscalls that return a file descriptor only, like connect(2). Other syscalls that return a file descriptor but not as return value (or return more than one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not work with this flag.
This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's return value, nor error can be set by the user. Upon successful invocation of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND flag, the notifying process's errno will be 0, and the return value will be the file descriptor number that was installed.
[1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxt... [2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/
Signed-off-by: Rodrigo Campos rodrigo@kinvolk.io Signed-off-by: Sargun Dhillon sargun@sargun.me Acked-by: Tycho Andersen tycho@tycho.pizza Acked-by: Christian Brauner christian.brauner@ubuntu.com Signed-off-by: Kees Cook keescook@chromium.org Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me Signed-off-by: Sasha Levin sashal@kernel.org --- .../userspace-api/seccomp_filter.rst | 12 +++++ include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 51 ++++++++++++++++--- 3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 6efb41cc8072..d61219889e49 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct seccomp_notif``.
+Userspace can also add file descriptors to the notifying process via +``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of +``struct seccomp_notif_addfd`` should be the same ``id`` as in +``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags +like O_EXEC on the file descriptor in the notifying process. If the supervisor +wants to inject the file descriptor with a specific number, the +``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to +the specific number to use. If that file descriptor is already open in the +notifying process it will be replaced. The supervisor can also add an FD, and +respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return +value will be the injected file descriptor number. + It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's memory is accessible to suitably privileged traces via ``ptrace()`` or diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 6ba18b82a02e..78074254ab98 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,7 @@ struct seccomp_notif_resp {
/* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
/** * struct seccomp_notif_addfd diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 9f58049ac16d..057e17f3215d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -107,6 +107,7 @@ struct seccomp_knotif { * installing process should allocate the fd as normal. * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC * is allowed. + * @ioctl_flags: The flags used for the seccomp_addfd ioctl. * @ret: The return value of the installing process. It is set to the fd num * upon success (>= 0). * @completion: Indicates that the installing process has completed fd @@ -118,6 +119,7 @@ struct seccomp_kaddfd { struct file *file; int fd; unsigned int flags; + __u32 ioctl_flags;
union { bool setfd; @@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) return filter->notif->next_id++; }
-static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n) { + int fd; + /* * Remove the notification, and reset the list pointers, indicating * that it has been handled. */ list_del_init(&addfd->list); if (!addfd->setfd) - addfd->ret = receive_fd(addfd->file, addfd->flags); + fd = receive_fd(addfd->file, addfd->flags); else - addfd->ret = receive_fd_replace(addfd->fd, addfd->file, - addfd->flags); + fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); + addfd->ret = fd; + + if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) { + /* If we fail reset and return an error to the notifier */ + if (fd < 0) { + n->state = SECCOMP_NOTIFY_SENT; + } else { + /* Return the FD we just added */ + n->flags = 0; + n->error = 0; + n->val = fd; + } + } + + /* + * Mark the notification as completed. From this point, addfd mem + * might be invalidated and we can't safely read it anymore. + */ complete(&addfd->completion); }
@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall, struct seccomp_kaddfd, list); /* Check if we were woken up by a addfd message */ if (addfd) - seccomp_handle_addfd(addfd); + seccomp_handle_addfd(addfd, &n);
} while (n.state != SECCOMP_NOTIFY_REPLIED);
@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (addfd.newfd_flags & ~O_CLOEXEC) return -EINVAL;
- if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) + if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND)) return -EINVAL;
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) @@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (!kaddfd.file) return -EBADF;
+ kaddfd.ioctl_flags = addfd.flags; kaddfd.flags = addfd.newfd_flags; kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD; kaddfd.fd = addfd.newfd; @@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, goto out_unlock; }
+ if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) { + /* + * Disallow queuing an atomic addfd + send reply while there are + * some addfd requests still to process. + * + * There is no clear reason to support it and allows us to keep + * the loop on the other side straight-forward. + */ + if (!list_empty(&knotif->addfd)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Allow exactly only one reply */ + knotif->state = SECCOMP_NOTIFY_REPLIED; + } + list_add(&kaddfd.list, &knotif->addfd); complete(&knotif->ready); mutex_unlock(&filter->notify_lock);
linux-stable-mirror@lists.linaro.org