From: Casey Schaufler casey@schaufler-ca.com
[ Upstream commit ecff30575b5ad0eda149aadad247b7f75411fd47 ]
The usual LSM hook "bail on fail" scheme doesn't work for cases where a security module may return an error code indicating that it does not recognize an input. In this particular case Smack sees a mount option that it recognizes, and returns 0. A call to a BPF hook follows, which returns -ENOPARAM, which confuses the caller because Smack has processed its data.
The SELinux hook incorrectly returns 1 on success. There was a time when this was correct, however the current expectation is that it return 0 on success. This is repaired.
Reported-by: syzbot+d1e3b1d92d25abf97943@syzkaller.appspotmail.com Signed-off-by: Casey Schaufler casey@schaufler-ca.com Acked-by: James Morris jamorris@linux.microsoft.com Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/security.c | 17 +++++++++++++++-- security/selinux/hooks.c | 5 ++--- 2 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/security/security.c b/security/security.c index a864ff824dd3..d9d42d64f89f 100644 --- a/security/security.c +++ b/security/security.c @@ -860,9 +860,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) return call_int_hook(fs_context_dup, 0, fc, src_fc); }
-int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) { - return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); + struct security_hook_list *hp; + int trc; + int rc = -ENOPARAM; + + hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, + list) { + trc = hp->hook.fs_context_parse_param(fc, param); + if (trc == 0) + rc = 0; + else if (trc != -ENOPARAM) + return trc; + } + return rc; }
int security_sb_alloc(struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 86159b32921c..63e61f2f1ad6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2820,10 +2820,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc, return opt;
rc = selinux_add_opt(opt, param->string, &fc->security); - if (!rc) { + if (!rc) param->string = NULL; - rc = 1; - } + return rc; }
From: Dave Stevenson dave.stevenson@raspberrypi.com
[ Upstream commit 5665eee7a3800430e7dc3ef6f25722476b603186 ]
The Atmel is doing some things in the I2C ISR, during which period it will not respond to further commands. This is particularly true of the POWERON command.
Increase delays appropriately, and retry should I2C errors be reported.
Signed-off-by: Dave Stevenson dave.stevenson@raspberrypi.com Signed-off-by: Detlev Casanova detlev.casanova@collabora.com Link: https://lore.kernel.org/r/20220124220129.158891-3-detlev.casanova@collabora.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../regulator/rpi-panel-attiny-regulator.c | 56 +++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-)
diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index ee46bfbf5eee..991b4730d768 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = { static int attiny_lcd_power_enable(struct regulator_dev *rdev) { unsigned int data; + int ret, i;
regmap_write(rdev->regmap, REG_POWERON, 1); + msleep(80); + /* Wait for nPWRDWN to go low to indicate poweron is done. */ - regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data, - data & BIT(0), 10, 1000000); + for (i = 0; i < 20; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) { + if (data & BIT(0)) + break; + } + usleep_range(10000, 12000); + } + usleep_range(10000, 12000); + + if (ret) + pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret);
/* Default to the same orientation as the closed source * firmware used for the panel. Runtime rotation @@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev) { regmap_write(rdev->regmap, REG_PWM, 0); regmap_write(rdev->regmap, REG_POWERON, 0); - udelay(1); + msleep(30); return 0; }
static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { unsigned int data; - int ret; + int ret, i;
- ret = regmap_read(rdev->regmap, REG_POWERON, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_POWERON, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } if (ret < 0) return ret;
if (!(data & BIT(0))) return 0;
- ret = regmap_read(rdev->regmap, REG_PORTB, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } + if (ret < 0) return ret;
@@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); int brightness = bl->props.brightness; + int ret, i;
if (bl->props.power != FB_BLANK_UNBLANK || bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0;
- return regmap_write(regmap, REG_PWM, brightness); + for (i = 0; i < 10; i++) { + ret = regmap_write(regmap, REG_PWM, brightness); + if (!ret) + break; + } + + return ret; }
static int attiny_get_brightness(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); - int ret, brightness; + int ret, brightness, i; + + for (i = 0; i < 10; i++) { + ret = regmap_read(regmap, REG_PWM, &brightness); + if (!ret) + break; + }
- ret = regmap_read(regmap, REG_PWM, &brightness); if (ret) return ret;
@@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, }
regmap_write(regmap, REG_POWERON, 0); - mdelay(1); + msleep(30);
config.dev = &i2c->dev; config.regmap = regmap;
From: Kees Cook keescook@chromium.org
[ Upstream commit 27e9faf415dbf94af19b9c827842435edbc1fbbc ]
Since STRING_CST may not be NUL terminated, strncmp() was used for check for equality. However, this may lead to mismatches for longer section names where the start matches the tested-for string. Test for exact equality by checking for the presences of NUL termination.
Cc: Alexander Popov alex.popov@linux.com Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/gcc-plugins/stackleak_plugin.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index 48e141e07956..dacd697ffd38 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -431,6 +431,23 @@ static unsigned int stackleak_cleanup_execute(void) return 0; }
+/* + * STRING_CST may or may not be NUL terminated: + * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html + */ +static inline bool string_equal(tree node, const char *string, int length) +{ + if (TREE_STRING_LENGTH(node) < length) + return false; + if (TREE_STRING_LENGTH(node) > length + 1) + return false; + if (TREE_STRING_LENGTH(node) == length + 1 && + TREE_STRING_POINTER(node)[length] != '\0') + return false; + return !memcmp(TREE_STRING_POINTER(node), string, length); +} +#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str)) + static bool stackleak_gate(void) { tree section; @@ -440,13 +457,13 @@ static bool stackleak_gate(void) if (section && TREE_VALUE(section)) { section = TREE_VALUE(TREE_VALUE(section));
- if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10)) + if (STRING_EQUAL(section, ".init.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13)) + if (STRING_EQUAL(section, ".devinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13)) + if (STRING_EQUAL(section, ".cpuinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13)) + if (STRING_EQUAL(section, ".meminit.text")) return false; }
From: Marc Zyngier maz@kernel.org
[ Upstream commit f7e53e2255808ca3abcc8f38d18ad0823425e771 ]
The npcm driver has a bunch of references to the irq_chip parent_device field, but never sets it.
Fix it by fishing that reference from somewhere else, but it is obvious that these debug statements were never used. Also remove an unused field in a local data structure.
Signed-off-by: Marc Zyngier maz@kernel.org Acked-by: Bartosz Golaszewski brgl@bgdev.pl Link: https://lore.kernel.org/r/20220201120310.878267-11-maz@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c | 25 +++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 6de31b5ee358..c359e25519f8 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en);
sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq);
- dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; }
@@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq;
- dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); }
@@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq;
/* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); }
@@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq;
/* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); }
@@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq;
/* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d);
From: Yu Kuai yukuai3@huawei.com
[ Upstream commit 8410f70977734f21b8ed45c37e925d311dfda2e7 ]
Our test report a UAF:
[ 2073.019181] ================================================================== [ 2073.019188] BUG: KASAN: use-after-free in __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019191] Write of size 8 at addr ffff8000ccf64128 by task rmmod/72584 [ 2073.019192] [ 2073.019196] CPU: 0 PID: 72584 Comm: rmmod Kdump: loaded Not tainted 4.19.90-yk #5 [ 2073.019198] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2073.019200] Call trace: [ 2073.019203] dump_backtrace+0x0/0x310 [ 2073.019206] show_stack+0x28/0x38 [ 2073.019210] dump_stack+0xec/0x15c [ 2073.019216] print_address_description+0x68/0x2d0 [ 2073.019220] kasan_report+0x238/0x2f0 [ 2073.019224] __asan_store8+0x88/0xb0 [ 2073.019229] __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019233] bfq_put_async_queues+0xbc/0x208 [ 2073.019236] bfq_pd_offline+0x178/0x238 [ 2073.019240] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019244] bfq_exit_queue+0x128/0x178 [ 2073.019249] blk_mq_exit_sched+0x12c/0x160 [ 2073.019252] elevator_exit+0xc8/0xd0 [ 2073.019256] blk_exit_queue+0x50/0x88 [ 2073.019259] blk_cleanup_queue+0x228/0x3d8 [ 2073.019267] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019274] null_exit+0x90/0x114 [null_blk] [ 2073.019278] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019282] el0_svc_common+0xc8/0x320 [ 2073.019287] el0_svc_handler+0xf8/0x160 [ 2073.019290] el0_svc+0x10/0x218 [ 2073.019291] [ 2073.019294] Allocated by task 14163: [ 2073.019301] kasan_kmalloc+0xe0/0x190 [ 2073.019305] kmem_cache_alloc_node_trace+0x1cc/0x418 [ 2073.019308] bfq_pd_alloc+0x54/0x118 [ 2073.019313] blkcg_activate_policy+0x250/0x460 [ 2073.019317] bfq_create_group_hierarchy+0x38/0x110 [ 2073.019321] bfq_init_queue+0x6d0/0x948 [ 2073.019325] blk_mq_init_sched+0x1d8/0x390 [ 2073.019330] elevator_switch_mq+0x88/0x170 [ 2073.019334] elevator_switch+0x140/0x270 [ 2073.019338] elv_iosched_store+0x1a4/0x2a0 [ 2073.019342] queue_attr_store+0x90/0xe0 [ 2073.019348] sysfs_kf_write+0xa8/0xe8 [ 2073.019351] kernfs_fop_write+0x1f8/0x378 [ 2073.019359] __vfs_write+0xe0/0x360 [ 2073.019363] vfs_write+0xf0/0x270 [ 2073.019367] ksys_write+0xdc/0x1b8 [ 2073.019371] __arm64_sys_write+0x50/0x60 [ 2073.019375] el0_svc_common+0xc8/0x320 [ 2073.019380] el0_svc_handler+0xf8/0x160 [ 2073.019383] el0_svc+0x10/0x218 [ 2073.019385] [ 2073.019387] Freed by task 72584: [ 2073.019391] __kasan_slab_free+0x120/0x228 [ 2073.019394] kasan_slab_free+0x10/0x18 [ 2073.019397] kfree+0x94/0x368 [ 2073.019400] bfqg_put+0x64/0xb0 [ 2073.019404] bfqg_and_blkg_put+0x90/0xb0 [ 2073.019408] bfq_put_queue+0x220/0x228 [ 2073.019413] __bfq_put_async_bfqq+0x98/0x168 [ 2073.019416] bfq_put_async_queues+0xbc/0x208 [ 2073.019420] bfq_pd_offline+0x178/0x238 [ 2073.019424] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019429] bfq_exit_queue+0x128/0x178 [ 2073.019433] blk_mq_exit_sched+0x12c/0x160 [ 2073.019437] elevator_exit+0xc8/0xd0 [ 2073.019440] blk_exit_queue+0x50/0x88 [ 2073.019443] blk_cleanup_queue+0x228/0x3d8 [ 2073.019451] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019459] null_exit+0x90/0x114 [null_blk] [ 2073.019462] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019467] el0_svc_common+0xc8/0x320 [ 2073.019471] el0_svc_handler+0xf8/0x160 [ 2073.019474] el0_svc+0x10/0x218 [ 2073.019475] [ 2073.019479] The buggy address belongs to the object at ffff8000ccf63f00 which belongs to the cache kmalloc-1024 of size 1024 [ 2073.019484] The buggy address is located 552 bytes inside of 1024-byte region [ffff8000ccf63f00, ffff8000ccf64300) [ 2073.019486] The buggy address belongs to the page: [ 2073.019492] page:ffff7e000333d800 count:1 mapcount:0 mapping:ffff8000c0003a00 index:0x0 compound_mapcount: 0 [ 2073.020123] flags: 0x7ffff0000008100(slab|head) [ 2073.020403] raw: 07ffff0000008100 ffff7e0003334c08 ffff7e00001f5a08 ffff8000c0003a00 [ 2073.020409] raw: 0000000000000000 00000000001c001c 00000001ffffffff 0000000000000000 [ 2073.020411] page dumped because: kasan: bad access detected [ 2073.020412] [ 2073.020414] Memory state around the buggy address: [ 2073.020420] ffff8000ccf64000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020424] ffff8000ccf64080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020428] >ffff8000ccf64100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020430] ^ [ 2073.020434] ffff8000ccf64180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020438] ffff8000ccf64200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020439] ==================================================================
The same problem exist in mainline as well.
This is because oom_bfqq is moved to a non-root group, thus root_group is freed earlier.
Thus fix the problem by don't move oom_bfqq.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jan Kara jack@suse.cz Acked-by: Paolo Valente paolo.valente@linaro.org Link: https://lore.kernel.org/r/20220129015924.3958918-4-yukuai3@huawei.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bfq-cgroup.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b791e2041e49..c2fdd6fcdaee 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity;
+ /* + * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group + * until elevator exit. + */ + if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate.
From: Christian Göttsche cgzones@googlemail.com
[ Upstream commit b97df7c098c531010e445da88d02b7bf7bf59ef6 ]
security_sid_to_context() expects a pointer to an u32 as the address where to store the length of the computed context.
Reported by sparse:
security/selinux/xfrm.c:359:39: warning: incorrect type in arg 4 (different signedness) security/selinux/xfrm.c:359:39: expected unsigned int [usertype] *scontext_len security/selinux/xfrm.c:359:39: got int *
Signed-off-by: Christian Göttsche cgzones@googlemail.com [PM: wrapped commit description] Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/selinux/xfrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 7314196185d1..00e95f8bd7c7 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -346,7 +346,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, int rc; struct xfrm_sec_ctx *ctx; char *ctx_str = NULL; - int str_len; + u32 str_len;
if (!polsec) return 0;
From: Richard Haines richard_c_haines@btinternet.com
[ Upstream commit 65881e1db4e948614d9eb195b8e1197339822949 ]
These ioctls are equivalent to fcntl(fd, F_SETFD, flags), which SELinux always allows too. Furthermore, a failed FIOCLEX could result in a file descriptor being leaked to a process that should not have access to it.
As this patch removes access controls, a policy capability needs to be enabled in policy to always allow these ioctls.
Based-on-patch-by: Demi Marie Obenour demiobenour@gmail.com Signed-off-by: Richard Haines richard_c_haines@btinternet.com [PM: subject line tweak] Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/selinux/hooks.c | 6 ++++++ security/selinux/include/policycap.h | 1 + security/selinux/include/policycap_names.h | 3 ++- security/selinux/include/security.h | 7 +++++++ 4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 63e61f2f1ad6..8c901ae05dd8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3647,6 +3647,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, CAP_OPT_NONE, true); break;
+ case FIOCLEX: + case FIONCLEX: + if (!selinux_policycap_ioctl_skip_cloexec()) + error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd); + break; + /* default case assumes that the command will go * to the file's ioctl() function. */ diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h index 2ec038efbb03..a9e572ca4fd9 100644 --- a/security/selinux/include/policycap.h +++ b/security/selinux/include/policycap.h @@ -11,6 +11,7 @@ enum { POLICYDB_CAPABILITY_CGROUPSECLABEL, POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION, POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS, + POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h index b89289f092c9..ebd64afe1def 100644 --- a/security/selinux/include/policycap_names.h +++ b/security/selinux/include/policycap_names.h @@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "always_check_network", "cgroup_seclabel", "nnp_nosuid_transition", - "genfs_seclabel_symlinks" + "genfs_seclabel_symlinks", + "ioctl_skip_cloexec" };
#endif /* _SELINUX_POLICYCAP_NAMES_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 63ca6e79daeb..1521460a97d4 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void) return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); }
+static inline bool selinux_policycap_ioctl_skip_cloexec(void) +{ + struct selinux_state *state = &selinux_state; + + return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]); +} + struct selinux_policy_convert_data;
struct selinux_load_state {
From: Chaitanya Kulkarni kch@nvidia.com
[ Upstream commit b27824d31f09ea7b4a6ba2c1b18bd328df3e8bed ]
sprintf does not know the PAGE_SIZE maximum of the temporary buffer used for outputting sysfs content and it's possible to overrun the PAGE_SIZE buffer length.
Use a generic sysfs_emit function that knows the size of the temporary buffer and ensures that no overrun is done for offset attribute in loop_attr_[offset|sizelimit|autoclear|partscan|dio]_show() callbacks.
Signed-off-by: Chaitanya Kulkarni kch@nvidia.com Reviewed-by: Himanshu Madhani himanshu.madhani@oracle.com Link: https://lore.kernel.org/r/20220215213310.7264-2-kch@nvidia.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/block/loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ee537a9f1d1a..e4517d483bdc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -797,33 +797,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); }
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); }
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
- return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); }
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
- return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); }
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
- return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); }
LOOP_ATTR_RO(backing_file);
From: Casey Schaufler casey@schaufler-ca.com
[ Upstream commit a5cd1ab7ab679d252a6d2f483eee7d45ebf2040c ]
Remove inappropriate use of ntohs() and assign the port value directly.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Casey Schaufler casey@schaufler-ca.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/smack/smack_lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 5c90b9fa4d40..b36b8668f1f4 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2506,7 +2506,7 @@ static int smk_ipv6_check(struct smack_known *subject, #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = PF_INET6; - ad.a.u.net->dport = ntohs(address->sin6_port); + ad.a.u.net->dport = address->sin6_port; if (act == SMK_RECEIVING) ad.a.u.net->v6info.saddr = address->sin6_addr; else
From: Marc Zyngier maz@kernel.org
[ Upstream commit a6aca2f460e203781dc41391913cc5b54f4bc0ce ]
pdc_enable_intr() serves as a primitive to qcom_pdc_gic_{en,dis}able, and has a raw spinlock for mutual exclusion, which is uses with interruptible primitives.
This means that this critical section can itself be interrupted. Should the interrupt also be a PDC interrupt, and the endpoint driver perform an irq_disable() on that interrupt, we end-up in a deadlock.
Fix this by using the irqsave/irqrestore variants of the locking primitives.
Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: Maulik Shah quic_mkshah@quicinc.com Link: https://lore.kernel.org/r/20220224101226.88373-5-maz@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/qcom-pdc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 5dc63c20b67e..fc747b7f4983 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -74,17 +74,18 @@ static int qcom_pdc_gic_set_irqchip_state(struct irq_data *d, static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long flags; u32 index, mask; u32 enable;
index = pin_out / 32; mask = pin_out % 32;
- raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); }
static void qcom_pdc_gic_disable(struct irq_data *d)
From: "Souptick Joarder (HPE)" jrdr.linux@gmail.com
[ Upstream commit e414c25e3399b2b3d7337dc47abccab5c71b7c8f ]
smatch warning was reported as below ->
smatch warnings: drivers/irqchip/irq-nvic.c:131 nvic_of_init() warn: 'nvic_base' not released on lines: 97.
Release nvic_base upon failure.
Reported-by: kernel test robot lkp@intel.com Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Souptick Joarder (HPE) jrdr.linux@gmail.com Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20220218163303.33344-1-jrdr.linux@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-nvic.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index 21cb31ff2bbf..e903c44edb64 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -94,6 +94,7 @@ static int __init nvic_of_init(struct device_node *node,
if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; }
@@ -103,6 +104,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; }
From: Akira Kawata akirakawata1@gmail.com
[ Upstream commit 0da1d5002745cdc721bc018b582a8a9704d56c42 ]
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=197921
As pointed out in the discussion of buglink, we cannot calculate AT_PHDR as the sum of load_addr and exec->e_phoff.
: The AT_PHDR of ELF auxiliary vectors should point to the memory address : of program header. But binfmt_elf.c calculates this address as follows: : : NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); : : which is wrong since e_phoff is the file offset of program header and : load_addr is the memory base address from PT_LOAD entry. : : The ld.so uses AT_PHDR as the memory address of program header. In normal : case, since the e_phoff is usually 64 and in the first PT_LOAD region, it : is the correct program header address. : : But if the address of program header isn't equal to the first PT_LOAD : address + e_phoff (e.g. Put the program header in other non-consecutive : PT_LOAD region), ld.so will try to read program header from wrong address : then crash or use incorrect program header.
This is because exec->e_phoff is the offset of PHDRs in the file and the address of PHDRs in the memory may differ from it. This patch fixes the bug by calculating the address of program headers from PT_LOADs directly.
Signed-off-by: Akira Kawata akirakawata1@gmail.com Reported-by: kernel test robot lkp@intel.com Acked-by: Kees Cook keescook@chromium.org Signed-off-by: Kees Cook keescook@chromium.org Link: https://lore.kernel.org/r/20220127124014.338760-2-akirakawata1@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/binfmt_elf.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 04c4aa7a1df2..ed507d27034b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -170,8 +170,8 @@ static int padzero(unsigned long elf_bss)
static int create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, - unsigned long load_addr, unsigned long interp_load_addr, - unsigned long e_entry) + unsigned long interp_load_addr, + unsigned long e_entry, unsigned long phdr_addr) { struct mm_struct *mm = current->mm; unsigned long p = bprm->p; @@ -256,7 +256,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); + NEW_AUX_ENT(AT_PHDR, phdr_addr); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); @@ -820,7 +820,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, static int load_elf_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ - unsigned long load_addr = 0, load_bias = 0; + unsigned long load_addr, load_bias = 0, phdr_addr = 0; int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; @@ -1153,6 +1153,17 @@ static int load_elf_binary(struct linux_binprm *bprm) reloc_func_desc = load_bias; } } + + /* + * Figure out which segment in the file contains the Program + * Header table, and map to the associated memory address. + */ + if (elf_ppnt->p_offset <= elf_ex->e_phoff && + elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) { + phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset + + elf_ppnt->p_vaddr; + } + k = elf_ppnt->p_vaddr; if ((elf_ppnt->p_flags & PF_X) && k < start_code) start_code = k; @@ -1188,6 +1199,7 @@ static int load_elf_binary(struct linux_binprm *bprm) }
e_entry = elf_ex->e_entry + load_bias; + phdr_addr += load_bias; elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; @@ -1251,8 +1263,8 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- retval = create_elf_tables(bprm, elf_ex, - load_addr, interp_load_addr, e_entry); + retval = create_elf_tables(bprm, elf_ex, interp_load_addr, + e_entry, phdr_addr); if (retval < 0) goto out;
From: Zhang Wensheng zhangwensheng5@huawei.com
[ Upstream commit ab552fcb17cc9e4afe0e4ac4df95fc7b30e8490a ]
KASAN reports a use-after-free report when doing normal scsi-mq test
[69832.239032] ================================================================== [69832.241810] BUG: KASAN: use-after-free in bfq_dispatch_request+0x1045/0x44b0 [69832.243267] Read of size 8 at addr ffff88802622ba88 by task kworker/3:1H/155 [69832.244656] [69832.245007] CPU: 3 PID: 155 Comm: kworker/3:1H Not tainted 5.10.0-10295-g576c6382529e #8 [69832.246626] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [69832.249069] Workqueue: kblockd blk_mq_run_work_fn [69832.250022] Call Trace: [69832.250541] dump_stack+0x9b/0xce [69832.251232] ? bfq_dispatch_request+0x1045/0x44b0 [69832.252243] print_address_description.constprop.6+0x3e/0x60 [69832.253381] ? __cpuidle_text_end+0x5/0x5 [69832.254211] ? vprintk_func+0x6b/0x120 [69832.254994] ? bfq_dispatch_request+0x1045/0x44b0 [69832.255952] ? bfq_dispatch_request+0x1045/0x44b0 [69832.256914] kasan_report.cold.9+0x22/0x3a [69832.257753] ? bfq_dispatch_request+0x1045/0x44b0 [69832.258755] check_memory_region+0x1c1/0x1e0 [69832.260248] bfq_dispatch_request+0x1045/0x44b0 [69832.261181] ? bfq_bfqq_expire+0x2440/0x2440 [69832.262032] ? blk_mq_delay_run_hw_queues+0xf9/0x170 [69832.263022] __blk_mq_do_dispatch_sched+0x52f/0x830 [69832.264011] ? blk_mq_sched_request_inserted+0x100/0x100 [69832.265101] __blk_mq_sched_dispatch_requests+0x398/0x4f0 [69832.266206] ? blk_mq_do_dispatch_ctx+0x570/0x570 [69832.267147] ? __switch_to+0x5f4/0xee0 [69832.267898] blk_mq_sched_dispatch_requests+0xdf/0x140 [69832.268946] __blk_mq_run_hw_queue+0xc0/0x270 [69832.269840] blk_mq_run_work_fn+0x51/0x60 [69832.278170] process_one_work+0x6d4/0xfe0 [69832.278984] worker_thread+0x91/0xc80 [69832.279726] ? __kthread_parkme+0xb0/0x110 [69832.280554] ? process_one_work+0xfe0/0xfe0 [69832.281414] kthread+0x32d/0x3f0 [69832.282082] ? kthread_park+0x170/0x170 [69832.282849] ret_from_fork+0x1f/0x30 [69832.283573] [69832.283886] Allocated by task 7725: [69832.284599] kasan_save_stack+0x19/0x40 [69832.285385] __kasan_kmalloc.constprop.2+0xc1/0xd0 [69832.286350] kmem_cache_alloc_node+0x13f/0x460 [69832.287237] bfq_get_queue+0x3d4/0x1140 [69832.287993] bfq_get_bfqq_handle_split+0x103/0x510 [69832.289015] bfq_init_rq+0x337/0x2d50 [69832.289749] bfq_insert_requests+0x304/0x4e10 [69832.290634] blk_mq_sched_insert_requests+0x13e/0x390 [69832.291629] blk_mq_flush_plug_list+0x4b4/0x760 [69832.292538] blk_flush_plug_list+0x2c5/0x480 [69832.293392] io_schedule_prepare+0xb2/0xd0 [69832.294209] io_schedule_timeout+0x13/0x80 [69832.295014] wait_for_common_io.constprop.1+0x13c/0x270 [69832.296137] submit_bio_wait+0x103/0x1a0 [69832.296932] blkdev_issue_discard+0xe6/0x160 [69832.297794] blk_ioctl_discard+0x219/0x290 [69832.298614] blkdev_common_ioctl+0x50a/0x1750 [69832.304715] blkdev_ioctl+0x470/0x600 [69832.305474] block_ioctl+0xde/0x120 [69832.306232] vfs_ioctl+0x6c/0xc0 [69832.306877] __se_sys_ioctl+0x90/0xa0 [69832.307629] do_syscall_64+0x2d/0x40 [69832.308362] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [69832.309382] [69832.309701] Freed by task 155: [69832.310328] kasan_save_stack+0x19/0x40 [69832.311121] kasan_set_track+0x1c/0x30 [69832.311868] kasan_set_free_info+0x1b/0x30 [69832.312699] __kasan_slab_free+0x111/0x160 [69832.313524] kmem_cache_free+0x94/0x460 [69832.314367] bfq_put_queue+0x582/0x940 [69832.315112] __bfq_bfqd_reset_in_service+0x166/0x1d0 [69832.317275] bfq_bfqq_expire+0xb27/0x2440 [69832.318084] bfq_dispatch_request+0x697/0x44b0 [69832.318991] __blk_mq_do_dispatch_sched+0x52f/0x830 [69832.319984] __blk_mq_sched_dispatch_requests+0x398/0x4f0 [69832.321087] blk_mq_sched_dispatch_requests+0xdf/0x140 [69832.322225] __blk_mq_run_hw_queue+0xc0/0x270 [69832.323114] blk_mq_run_work_fn+0x51/0x60 [69832.323942] process_one_work+0x6d4/0xfe0 [69832.324772] worker_thread+0x91/0xc80 [69832.325518] kthread+0x32d/0x3f0 [69832.326205] ret_from_fork+0x1f/0x30 [69832.326932] [69832.338297] The buggy address belongs to the object at ffff88802622b968 [69832.338297] which belongs to the cache bfq_queue of size 512 [69832.340766] The buggy address is located 288 bytes inside of [69832.340766] 512-byte region [ffff88802622b968, ffff88802622bb68) [69832.343091] The buggy address belongs to the page: [69832.344097] page:ffffea0000988a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88802622a528 pfn:0x26228 [69832.346214] head:ffffea0000988a00 order:2 compound_mapcount:0 compound_pincount:0 [69832.347719] flags: 0x1fffff80010200(slab|head) [69832.348625] raw: 001fffff80010200 ffffea0000dbac08 ffff888017a57650 ffff8880179fe840 [69832.354972] raw: ffff88802622a528 0000000000120008 00000001ffffffff 0000000000000000 [69832.356547] page dumped because: kasan: bad access detected [69832.357652] [69832.357970] Memory state around the buggy address: [69832.358926] ffff88802622b980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [69832.360358] ffff88802622ba00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [69832.361810] >ffff88802622ba80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [69832.363273] ^ [69832.363975] ffff88802622bb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc [69832.375960] ffff88802622bb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [69832.377405] ==================================================================
In bfq_dispatch_requestfunction, it may have function call:
bfq_dispatch_request __bfq_dispatch_request bfq_select_queue bfq_bfqq_expire __bfq_bfqd_reset_in_service bfq_put_queue kmem_cache_free In this function call, in_serv_queue has beed expired and meet the conditions to free. In the function bfq_dispatch_request, the address of in_serv_queue pointing to has been released. For getting the value of idle_timer_disabled, it will get flags value from the address which in_serv_queue pointing to, then the problem of use-after-free happens;
Fix the problem by check in_serv_queue == bfqd->in_service_queue, to get the value of idle_timer_disabled if in_serve_queue is equel to bfqd->in_service_queue. If the space of in_serv_queue pointing has been released, this judge will aviod use-after-free problem. And if in_serv_queue may be expired or finished, the idle_timer_disabled will be false which would not give effects to bfq_update_dispatch_stats.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zhang Wensheng zhangwensheng5@huawei.com Link: https://lore.kernel.org/r/20220303070334.3020168-1-zhangwensheng5@huawei.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bfq-iosched.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 8d95bf7765b1..138541996dd5 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4799,7 +4799,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; - bool waiting_rq, idle_timer_disabled; + bool waiting_rq, idle_timer_disabled = false;
spin_lock_irq(&bfqd->lock);
@@ -4807,14 +4807,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + if (in_serv_queue == bfqd->in_service_queue) { + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + }
spin_unlock_irq(&bfqd->lock); - - bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, - idle_timer_disabled); + bfq_update_dispatch_stats(hctx->queue, rq, + idle_timer_disabled ? in_serv_queue : NULL, + idle_timer_disabled);
return rq; }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
[ Upstream commit 0c9992315e738e7d6e927ef36839a466b080dba6 ]
ACPICA commit b1c3656ef4950098e530be68d4b589584f06cddc
Prevent acpi_ns_walk_namespace() from crashing when called with start_node equal to ACPI_ROOT_OBJECT if the Namespace has not been instantiated yet and acpi_gbl_root_node is NULL.
For instance, this can happen if the kernel is run with "acpi=off" in the command line.
Link: https://github.com/acpica/acpica/commit/b1c3656ef4950098e530be68d4b589584f06... Link: https://lore.kernel.org/linux-acpi/CAJZ5v0hJWW_vZ3wwajE7xT38aWjY7cZyvqMJpXHz... Reported-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/acpica/nswalk.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index b7f3e8603ad8..901fa5ca284d 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type,
if (start_node == ACPI_ROOT_OBJECT) { start_node = acpi_gbl_root_node; + if (!start_node) { + return_ACPI_STATUS(AE_NO_NAMESPACE); + } }
/* Null child means "get first node" */
From: Paul Menzel pmenzel@molgen.mpg.de
[ Upstream commit 633174a7046ec3b4572bec24ef98e6ee89bce14b ]
Buidling raid6test on Ubuntu 21.10 (ppc64le) with GNU Make 4.3 shows the errors below:
$ cd lib/raid6/test/ $ make <stdin>:1:1: error: stray ‘\’ in program <stdin>:1:2: error: stray ‘#’ in program <stdin>:1:11: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ \ before ‘<’ token
[...]
The errors come from the HAS_ALTIVEC test, which fails, and the POWER optimized versions are not built. That’s also reason nobody noticed on the other architectures.
GNU Make 4.3 does not remove the backslash anymore. From the 4.3 release announcment:
- WARNING: Backward-incompatibility! Number signs (#) appearing inside a macro reference or function invocation no longer introduce comments and should not be escaped with backslashes: thus a call such as: foo := $(shell echo '#') is legal. Previously the number sign needed to be escaped, for example: foo := $(shell echo '#') Now this latter will resolve to "#". If you want to write makefiles portable to both versions, assign the number sign to a variable: H := # foo := $(shell echo '$H') This was claimed to be fixed in 3.81, but wasn't, for some reason. To detect this change search for 'nocomment' in the .FEATURES variable.
So, do the same as commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files for future Make") and commit 929bef467771 ("bpf: Use $(pound) instead of # in Makefiles") and define and use a $(pound) variable.
Reference for the change in make: https://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff0...
Cc: Matt Brown matthew.brown.dev@gmail.com Signed-off-by: Paul Menzel pmenzel@molgen.mpg.de Signed-off-by: Song Liu song@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- lib/raid6/test/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index a4c7cd74cff5..4fb7700a741b 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -4,6 +4,8 @@ # from userspace. #
+pound := # + CC = gcc OPTFLAGS = -O2 # Adjust as desired CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) @@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell printf '#include <altivec.h>\nvector int a;\n' |\ + HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\ gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -I../../../arch/powerpc/include
From: Paolo Valente paolo.valente@linaro.org
[ Upstream commit 15729ff8143f8135b03988a100a19e66d7cb7ecd ]
A crash [1] happened to be triggered in conjunction with commit 2d52c58b9c9b ("block, bfq: honor already-setup queue merges"). The latter was then reverted by commit ebc69e897e17 ("Revert "block, bfq: honor already-setup queue merges""). Yet, the reverted commit was not the one introducing the bug. In fact, it actually triggered a UAF introduced by a different commit, and now fixed by commit d29bd41428cf ("block, bfq: reset last_bfqq_created on group change").
So, there is no point in keeping commit 2d52c58b9c9b ("block, bfq: honor already-setup queue merges") out. This commit restores it.
[1] https://bugzilla.kernel.org/show_bug.cgi?id=214503
Reported-by: Holger Hoffstätte holger@applied-asynchrony.com Signed-off-by: Paolo Valente paolo.valente@linaro.org Link: https://lore.kernel.org/r/20211125181510.15004-1-paolo.valente@linaro.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bfq-iosched.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 138541996dd5..de2cd4bd602f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2526,6 +2526,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; + /* + * The above assignment schedules the following redirections: + * each time some I/O for bfqq arrives, the process that + * generated that I/O is disassociated from bfqq and + * associated with new_bfqq. Here we increases new_bfqq->ref + * in advance, adding the number of processes that are + * expected to be associated with new_bfqq as they happen to + * issue I/O. + */ new_bfqq->ref += process_refs; return new_bfqq; } @@ -2585,6 +2594,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq;
+ /* if a merge has already been setup, then proceed with that first */ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + /* * Do not perform queue merging if the device is non * rotational and performs internal queueing. In fact, such a @@ -2639,9 +2652,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL;
- if (bfqq->new_bfqq) - return bfqq->new_bfqq; - if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL;
From: Darren Hart darren@os.amperecomputing.com
[ Upstream commit 3f8dec116210ca649163574ed5f8df1e3b837d07 ]
Platforms with large BERT table data can trigger soft lockup errors while attempting to print the entire BERT table data to the console at boot:
watchdog: BUG: soft lockup - CPU#160 stuck for 23s! [swapper/0:1]
Observed on Ampere Altra systems with a single BERT record of ~250KB.
The original bert driver appears to have assumed relatively small table data. Since it is impractical to reassemble large table data from interwoven console messages, and the table data is available in
/sys/firmware/acpi/tables/data/BERT
limit the size for tables printed to the console to 1024 (for no reason other than it seemed like a good place to kick off the discussion, would appreciate feedback from existing users in terms of what size would maintain their current usage model).
Alternatively, we could make printing a CONFIG option, use the bert_disable boot arg (or something similar), or use a debug log level. However, all those solutions require extra steps or change the existing behavior for small table data. Limiting the size preserves existing behavior on existing platforms with small table data, and eliminates the soft lockups for platforms with large table data, while still making it available.
Signed-off-by: Darren Hart darren@os.amperecomputing.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/apei/bert.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 19e50fcbf4d6..ad8ab3f12cf3 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -29,6 +29,7 @@
#undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt +#define ACPI_BERT_PRINT_MAX_LEN 1024
static int bert_disable;
@@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region, }
pr_info_once("Error records from previous boot:\n"); - - cper_estatus_print(KERN_INFO HW_ERR, estatus); + if (region_len < ACPI_BERT_PRINT_MAX_LEN) + cper_estatus_print(KERN_INFO HW_ERR, estatus); + else + pr_info_once("Max print length exceeded, table data is available at:\n" + "/sys/firmware/acpi/tables/data/BERT");
/* * Because the boot error source is "one-time polled" type,
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
[ Upstream commit 524bb1da785a7ae43dd413cd392b5071c6c367f8 ]
The function device_pm_check_callbacks() can be called under the spin lock (in the reported case it happens from genpd_add_device() -> dev_pm_domain_set(), when the genpd uses spinlocks rather than mutexes.
However this function uncoditionally uses spin_lock_irq() / spin_unlock_irq(), thus not preserving the CPU flags. Use the irqsave/irqrestore instead.
The backtrace for the reference: [ 2.752010] ------------[ cut here ]------------ [ 2.756769] raw_local_irq_restore() called with IRQs enabled [ 2.762596] WARNING: CPU: 4 PID: 1 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x34/0x50 [ 2.772338] Modules linked in: [ 2.775487] CPU: 4 PID: 1 Comm: swapper/0 Tainted: G S 5.17.0-rc6-00384-ge330d0d82eff-dirty #684 [ 2.781384] Freeing initrd memory: 46024K [ 2.785839] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.785841] pc : warn_bogus_irq_restore+0x34/0x50 [ 2.785844] lr : warn_bogus_irq_restore+0x34/0x50 [ 2.785846] sp : ffff80000805b7d0 [ 2.785847] x29: ffff80000805b7d0 x28: 0000000000000000 x27: 0000000000000002 [ 2.785850] x26: ffffd40e80930b18 x25: ffff7ee2329192b8 x24: ffff7edfc9f60800 [ 2.785853] x23: ffffd40e80930b18 x22: ffffd40e80930d30 x21: ffff7edfc0dffa00 [ 2.785856] x20: ffff7edfc09e3768 x19: 0000000000000000 x18: ffffffffffffffff [ 2.845775] x17: 6572206f74206465 x16: 6c696166203a3030 x15: ffff80008805b4f7 [ 2.853108] x14: 0000000000000000 x13: ffffd40e809550b0 x12: 00000000000003d8 [ 2.860441] x11: 0000000000000148 x10: ffffd40e809550b0 x9 : ffffd40e809550b0 [ 2.867774] x8 : 00000000ffffefff x7 : ffffd40e809ad0b0 x6 : ffffd40e809ad0b0 [ 2.875107] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000 [ 2.882440] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff7edfc03a8000 [ 2.889774] Call trace: [ 2.892290] warn_bogus_irq_restore+0x34/0x50 [ 2.896770] _raw_spin_unlock_irqrestore+0x94/0xa0 [ 2.901690] genpd_unlock_spin+0x20/0x30 [ 2.905724] genpd_add_device+0x100/0x2d0 [ 2.909850] __genpd_dev_pm_attach+0xa8/0x23c [ 2.914329] genpd_dev_pm_attach_by_id+0xc4/0x190 [ 2.919167] genpd_dev_pm_attach_by_name+0x3c/0xd0 [ 2.924086] dev_pm_domain_attach_by_name+0x24/0x30 [ 2.929102] psci_dt_attach_cpu+0x24/0x90 [ 2.933230] psci_cpuidle_probe+0x2d4/0x46c [ 2.937534] platform_probe+0x68/0xe0 [ 2.941304] really_probe.part.0+0x9c/0x2fc [ 2.945605] __driver_probe_device+0x98/0x144 [ 2.950085] driver_probe_device+0x44/0x15c [ 2.954385] __device_attach_driver+0xb8/0x120 [ 2.958950] bus_for_each_drv+0x78/0xd0 [ 2.962896] __device_attach+0xd8/0x180 [ 2.966843] device_initial_probe+0x14/0x20 [ 2.971144] bus_probe_device+0x9c/0xa4 [ 2.975092] device_add+0x380/0x88c [ 2.978679] platform_device_add+0x114/0x234 [ 2.983067] platform_device_register_full+0x100/0x190 [ 2.988344] psci_idle_init+0x6c/0xb0 [ 2.992113] do_one_initcall+0x74/0x3a0 [ 2.996060] kernel_init_freeable+0x2fc/0x384 [ 3.000543] kernel_init+0x28/0x130 [ 3.004132] ret_from_fork+0x10/0x20 [ 3.007817] irq event stamp: 319826 [ 3.011404] hardirqs last enabled at (319825): [<ffffd40e7eda0268>] __up_console_sem+0x78/0x84 [ 3.020332] hardirqs last disabled at (319826): [<ffffd40e7fd6d9d8>] el1_dbg+0x24/0x8c [ 3.028458] softirqs last enabled at (318312): [<ffffd40e7ec90410>] _stext+0x410/0x588 [ 3.036678] softirqs last disabled at (318299): [<ffffd40e7ed1bf68>] __irq_exit_rcu+0x158/0x174 [ 3.045607] ---[ end trace 0000000000000000 ]---
Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/power/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 4167e2aef397..1dbaaddf540e 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1994,7 +1994,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
void device_pm_check_callbacks(struct device *dev) { - spin_lock_irq(&dev->power.lock); + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && @@ -2003,7 +2005,7 @@ void device_pm_check_callbacks(struct device *dev) (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); }
bool dev_pm_skip_suspend(struct device *dev)
From: John David Anglin dave.anglin@bell.net
[ Upstream commit e00b0a2ab8ec019c344e53bfc76e31c18bb587b7 ]
Currently, the parisc kernel does not fully support non-access TLB fault handling for probe instructions. In the fast path, we set the target register to zero if it is not a shadowed register. The slow path is not implemented, so we call do_page_fault. The architecture indicates that non-access faults should not cause a page fault from disk.
This change adds to code to provide non-access fault support for probe instructions. It also modifies the handling of faults on userspace so that if the address lies in a valid VMA and the access type matches that for the VMA, the probe target register is set to one. Otherwise, the target register is set to zero.
This was done to make probe instructions more useful for userspace. Probe instructions are not very useful if they set the target register to zero whenever a page is not present in memory. Nominally, the purpose of the probe instruction is determine whether read or write access to a given address is allowed.
This fixes a problem in function pointer comparison noticed in the glibc testsuite (stdio-common/tst-vfprintf-user-type). The same problem is likely in glibc (_dl_lookup_address).
V2 adds flush and lpa instruction support to handle_nadtlb_fault.
Signed-off-by: John David Anglin dave.anglin@bell.net Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/parisc/include/asm/traps.h | 1 + arch/parisc/kernel/traps.c | 2 + arch/parisc/mm/fault.c | 89 +++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+)
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 8ecc1f0c0483..d0e090a2c000 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -17,6 +17,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); +int handle_nadtlb_fault(struct pt_regs *regs); #endif
#endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 269b737d2629..bce47e0fb692 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -661,6 +661,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ + if (code == 17 && handle_nadtlb_fault(regs)) + return; fault_address = regs->ior; fault_space = regs->isr; break; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 716960f5d92e..5faa3cff4738 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -424,3 +424,92 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, goto no_context; pagefault_out_of_memory(); } + +/* Handle non-access data TLB miss faults. + * + * For probe instructions, accesses to userspace are considered allowed + * if they lie in a valid VMA and the access type matches. We are not + * allowed to handle MM faults here so there may be situations where an + * actual access would fail even though a probe was successful. + */ +int +handle_nadtlb_fault(struct pt_regs *regs) +{ + unsigned long insn = regs->iir; + int breg, treg, xreg, val = 0; + struct vm_area_struct *vma, *prev_vma; + struct task_struct *tsk; + struct mm_struct *mm; + unsigned long address; + unsigned long acc_type; + + switch (insn & 0x380) { + case 0x280: + /* FDC instruction */ + fallthrough; + case 0x380: + /* PDC and FIC instructions */ + if (printk_ratelimit()) { + pr_warn("BUG: nullifying cache flush/purge instruction\n"); + show_regs(regs); + } + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + regs->gr[0] |= PSW_N; + return 1; + + case 0x180: + /* PROBE instruction */ + treg = insn & 0x1f; + if (regs->isr) { + tsk = current; + mm = tsk->mm; + if (mm) { + /* Search for VMA */ + address = regs->ior; + mmap_read_lock(mm); + vma = find_vma_prev(mm, address, &prev_vma); + mmap_read_unlock(mm); + + /* + * Check if access to the VMA is okay. + * We don't allow for stack expansion. + */ + acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; + if (vma + && address >= vma->vm_start + && (vma->vm_flags & acc_type) == acc_type) + val = 1; + } + } + if (treg) + regs->gr[treg] = val; + regs->gr[0] |= PSW_N; + return 1; + + case 0x300: + /* LPA instruction */ + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + treg = insn & 0x1f; + if (treg) + regs->gr[treg] = 0; + regs->gr[0] |= PSW_N; + return 1; + + default: + break; + } + + return 0; +}
From: Chris Leech cleech@redhat.com
[ Upstream commit 841aee4d75f18fdfb53935080b03de0c65e9b92c ]
Put NVMe/TCP sockets in their own class to avoid some lockdep warnings. Sockets created by nvme-tcp are not exposed to user-space, and will not trigger certain code paths that the general socket API exposes.
Lockdep complains about a circular dependency between the socket and filesystem locks, because setsockopt can trigger a page fault with a socket lock held, but nvme-tcp sends requests on the socket while file system locks are held.
====================================================== WARNING: possible circular locking dependency detected 5.15.0-rc3 #1 Not tainted ------------------------------------------------------ fio/1496 is trying to acquire lock: (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendpage+0x23/0x80
but task is already holding lock: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs]
which lock already depends on the new lock.
other info that might help us debug this:
chain exists of: sk_lock-AF_INET --> sb_internal --> &xfs_dir_ilock_class/5
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&xfs_dir_ilock_class/5); lock(sb_internal); lock(&xfs_dir_ilock_class/5); lock(sk_lock-AF_INET);
*** DEADLOCK ***
6 locks held by fio/1496: #0: (sb_writers#13){.+.+}-{0:0}, at: path_openat+0x9fc/0xa20 #1: (&inode->i_sb->s_type->i_mutex_dir_key){++++}-{3:3}, at: path_openat+0x296/0xa20 #2: (sb_internal){.+.+}-{0:0}, at: xfs_trans_alloc_icreate+0x41/0xd0 [xfs] #3: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs] #4: (hctx->srcu){....}-{0:0}, at: hctx_lock+0x51/0xd0 #5: (&queue->send_mutex){+.+.}-{3:3}, at: nvme_tcp_queue_rq+0x33e/0x380 [nvme_tcp]
This annotation lets lockdep analyze nvme-tcp controlled sockets independently of what the user-space sockets API does.
Link: https://lore.kernel.org/linux-nvme/CAHj4cs9MDYLJ+q+2_GXUK9HxFizv2pxUryUR0toX...
Signed-off-by: Chris Leech cleech@redhat.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6105894a218a..7e3932033707 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1422,6 +1460,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; }
+ nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1);
From: Minghao Chi chi.minghao@zte.com.cn
[ Upstream commit c9839acfcbe20ce43d363c2a9d0772472d9921c0 ]
Use of_device_get_match_data() to simplify the code.
Reported-by: Zeal Robot zealci@zte.com.cn Signed-off-by: Minghao Chi chi.minghao@zte.com.cn Link: https://lore.kernel.org/r/20220315023138.2118293-1-chi.minghao@zte.com.cn Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-tegra20-slink.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 669fc4286231..9e2b812b9025 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1006,14 +1006,8 @@ static int tegra_slink_probe(struct platform_device *pdev) struct resource *r; int ret, spi_irq; const struct tegra_slink_chip_data *cdata = NULL; - const struct of_device_id *match;
- match = of_match_device(tegra_slink_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "Error: No device match found\n"); - return -ENODEV; - } - cdata = match->data; + cdata = of_device_get_match_data(&pdev->dev);
master = spi_alloc_master(&pdev->dev, sizeof(*tspi)); if (!master) {
linux-stable-mirror@lists.linaro.org