From: Matti Vaittinen mazziesaccount@gmail.com
[ Upstream commit 7ab681ddedd4b6dd2b047c74af95221c5f827e1d ]
The regulator IRQ helper requires caller to provide pointer to IRQ name which is kept in memory by caller. All other data passed to the helper in the regulator_irq_desc structure is copied. This can cause some confusion and unnecessary complexity.
Make the regulator_irq_helper() to copy also the provided IRQ name information so caller can discard the name after the call to regulator_irq_helper() completes.
Signed-off-by: Matti Vaittinen mazziesaccount@gmail.com Link: https://msgid.link/r/ZhJMuUYwaZbBXFGP@drtxq0yyyyyyyyyyyyydy-3.rev.dnainterne... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/irq_helpers.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c index 5227644355750..a44a0b30a6516 100644 --- a/drivers/regulator/irq_helpers.c +++ b/drivers/regulator/irq_helpers.c @@ -350,6 +350,9 @@ void *regulator_irq_helper(struct device *dev,
h->irq = irq; h->desc = *d; + h->desc.name = devm_kstrdup(dev, d->name, GFP_KERNEL); + if (!h->desc.name) + return ERR_PTR(-ENOMEM);
ret = init_rdev_state(dev, h, rdev, common_errs, per_rdev_errs, rdev_amount);
From: Derek Fang derek.fang@realtek.com
[ Upstream commit 103abab975087e1f01b76fcb54c91dbb65dbc249 ]
The codec leaves tie combo jack's sleeve/ring2 to floating status default. It would cause electric noise while connecting the active speaker jack during boot or shutdown. This patch requests a gpio to control the additional jack circuit to tie the contacts to the ground or floating.
Signed-off-by: Derek Fang derek.fang@realtek.com
Link: https://msgid.link/r/20240408091057.14165-1-derek.fang@realtek.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/rt5645.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 2cc3d814bab41..5a44f5201515b 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -441,6 +441,7 @@ struct rt5645_priv { struct regmap *regmap; struct i2c_client *i2c; struct gpio_desc *gpiod_hp_det; + struct gpio_desc *gpiod_cbj_sleeve; struct snd_soc_jack *hp_jack; struct snd_soc_jack *mic_jack; struct snd_soc_jack *btn_jack; @@ -3179,6 +3180,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2, RT5645_CBJ_MN_JD, 0);
+ if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 1); + msleep(600); regmap_read(rt5645->regmap, RT5645_IN1_CTRL3, &val); val &= 0x7; @@ -3195,6 +3199,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse snd_soc_dapm_disable_pin(dapm, "Mic Det Power"); snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, @@ -3220,6 +3226,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); }
return rt5645->jack_type; @@ -3933,6 +3942,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, return ret; }
+ rt5645->gpiod_cbj_sleeve = devm_gpiod_get_optional(&i2c->dev, "cbj-sleeve", + GPIOD_OUT_LOW); + + if (IS_ERR(rt5645->gpiod_cbj_sleeve)) { + ret = PTR_ERR(rt5645->gpiod_cbj_sleeve); + dev_info(&i2c->dev, "failed to initialize gpiod, ret=%d\n", ret); + if (ret != -ENOENT) + return ret; + } + for (i = 0; i < ARRAY_SIZE(rt5645->supplies); i++) rt5645->supplies[i].supply = rt5645_supply_names[i];
@@ -4176,6 +4195,9 @@ static int rt5645_i2c_remove(struct i2c_client *i2c) cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work);
+ if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); + regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
return 0; @@ -4193,6 +4215,9 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c) 0); msleep(20); regmap_write(rt5645->regmap, RT5645_RESET, 0); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); }
static struct i2c_driver rt5645_i2c_driver = {
From: Derek Fang derek.fang@realtek.com
[ Upstream commit 306b38e3fa727d22454a148a364123709e356600 ]
Add an optional gpio property to control external CBJ circuits to avoid some electric noise caused by sleeve/ring2 contacts floating.
Signed-off-by: Derek Fang derek.fang@realtek.com
Link: https://msgid.link/r/20240408091057.14165-2-derek.fang@realtek.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/devicetree/bindings/sound/rt5645.txt | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt index 41a62fd2ae1ff..c1fa379f5f3ea 100644 --- a/Documentation/devicetree/bindings/sound/rt5645.txt +++ b/Documentation/devicetree/bindings/sound/rt5645.txt @@ -20,6 +20,11 @@ Optional properties: a GPIO spec for the external headphone detect pin. If jd-mode = 0, we will get the JD status by getting the value of hp-detect-gpios.
+- cbj-sleeve-gpios: + a GPIO spec to control the external combo jack circuit to tie the sleeve/ring2 + contacts to the ground or floating. It could avoid some electric noise from the + active speaker jacks. + - realtek,in2-differential Boolean. Indicate MIC2 input are differential, rather than single-ended.
@@ -68,6 +73,7 @@ codec: rt5650@1a { compatible = "realtek,rt5650"; reg = <0x1a>; hp-detect-gpios = <&gpio 19 0>; + cbj-sleeve-gpios = <&gpio 20 0>; interrupt-parent = <&gpio>; interrupts = <7 IRQ_TYPE_EDGE_FALLING>; realtek,dmic-en = "true";
From: Krzysztof Kozlowski krzk@kernel.org
[ Upstream commit 68adb581a39ae63a0ed082c47f01fbbe515efa0e ]
Add MODULE_DEVICE_TABLE(), so the module could be properly autoloaded based on the alias from of_device_id table.
Signed-off-by: Krzysztof Kozlowski krzk@kernel.org Reviewed-by: Konrad Dybcio konrad.dybcio@linaro.org Link: https://msgid.link/r/20240410172615.255424-2-krzk@kernel.org Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/vqmmc-ipq4019-regulator.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/regulator/vqmmc-ipq4019-regulator.c b/drivers/regulator/vqmmc-ipq4019-regulator.c index 6d5ae25d08d1e..e2a28788d8a22 100644 --- a/drivers/regulator/vqmmc-ipq4019-regulator.c +++ b/drivers/regulator/vqmmc-ipq4019-regulator.c @@ -86,6 +86,7 @@ static const struct of_device_id regulator_ipq4019_of_match[] = { { .compatible = "qcom,vqmmc-ipq4019-regulator", }, {}, }; +MODULE_DEVICE_TABLE(of, regulator_ipq4019_of_match);
static struct platform_driver ipq4019_regulator_driver = { .probe = ipq4019_regulator_probe,
From: Jack Yu jack.yu@realtek.com
[ Upstream commit cebfbc89ae2552dbb58cd9b8206a5c8e0e6301e9 ]
Add vendor clear control register in readable register's callback function. This prevents an access failure reported in Intel CI tests.
Signed-off-by: Jack Yu jack.yu@realtek.com Closes: https://github.com/thesofproject/linux/issues/4860 Tested-by: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com Link: https://lore.kernel.org/r/6a103ce9134d49d8b3941172c87a7bd4@realtek.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/rt715-sdw.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index b047bf87a100c..e269026942e17 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -111,6 +111,7 @@ static bool rt715_readable_register(struct device *dev, unsigned int reg) case 0x839d: case 0x83a7: case 0x83a9: + case 0x752001: case 0x752039: return true; default:
From: Jack Yu jack.yu@realtek.com
[ Upstream commit bda16500dd0b05e2e047093b36cbe0873c95aeae ]
Volume step (dB/step) modification to fix format error which shown in amixer control.
Signed-off-by: Jack Yu jack.yu@realtek.com Link: https://lore.kernel.org/r/b1f546ad16dc4c7abb7daa7396e8345c@realtek.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/rt715-sdca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index bfa536bd71960..7c8d6a012f610 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -315,7 +315,7 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, return 0; }
-static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1725, 75, 0); static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 1000, 0);
static int rt715_sdca_get_volsw(struct snd_kcontrol *kcontrol, @@ -476,7 +476,7 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC7_27_VOL, RT715_SDCA_FU_VOL_CTRL, CH_02), - 0x2f, 0x7f, 0, + 0x2f, 0x3f, 0, rt715_sdca_set_amp_gain_get, rt715_sdca_set_amp_gain_put, in_vol_tlv), RT715_SDCA_EXT_TLV("FU02 Capture Volume", @@ -484,13 +484,13 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), RT715_SDCA_EXT_TLV("FU06 Capture Volume", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC10_11_VOL, RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), /* MIC Boost Control */ RT715_SDCA_BOOST_EXT_TLV("FU0E Boost", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_DMIC_GAIN_EN,
From: Zqiang qiang.zhang1211@gmail.com
[ Upstream commit 1dd1eff161bd55968d3d46bc36def62d71fb4785 ]
Currently, the condition "__this_cpu_read(ksoftirqd) == current" is used to invoke rcu_softirq_qs() in ksoftirqd tasks context for non-RT kernels.
This works correctly as long as the context is actually task context but this condition is wrong when:
- the current task is ksoftirqd - the task is interrupted in a RCU read side critical section - __do_softirq() is invoked on return from interrupt
Syzkaller triggered the following scenario:
-> finish_task_switch() -> put_task_struct_rcu_user() -> call_rcu(&task->rcu, delayed_put_task_struct) -> __kasan_record_aux_stack() -> pfn_valid() -> rcu_read_lock_sched() <interrupt> __irq_exit_rcu() -> __do_softirq)() -> if (!IS_ENABLED(CONFIG_PREEMPT_RT) && __this_cpu_read(ksoftirqd) == current) -> rcu_softirq_qs() -> RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map))
The rcu quiescent state is reported in the rcu-read critical section, so the lockdep warning is triggered.
Fix this by splitting out the inner working of __do_softirq() into a helper function which takes an argument to distinguish between ksoftirqd task context and interrupted context and invoke it from the relevant call sites with the proper context information and use that for the conditional invocation of rcu_softirq_qs().
Reported-by: syzbot+dce04ed6d1438ad69656@syzkaller.appspotmail.com Suggested-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Zqiang qiang.zhang1211@gmail.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/20240427102808.29356-1-qiang.zhang1211@gmail.com Link: https://lore.kernel.org/lkml/8f281a10-b85a-4586-9586-5bbc12dc784f@paulmck-la... Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/softirq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/kernel/softirq.c b/kernel/softirq.c index 41f470929e991..dc60f0c66a25f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -512,7 +512,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif
-asmlinkage __visible void __softirq_entry __do_softirq(void) +static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -567,8 +567,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending >>= softirq_bit; }
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && - __this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd) rcu_softirq_qs();
local_irq_disable(); @@ -588,6 +587,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current_restore_flags(old_flags, PF_MEMALLOC); }
+asmlinkage __visible void __softirq_entry __do_softirq(void) +{ + handle_softirqs(false); +} + /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ @@ -918,7 +922,7 @@ static void run_ksoftirqd(unsigned int cpu) * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ - __do_softirq(); + handle_softirqs(true); ksoftirqd_run_end(); cond_resched(); return;
From: Asbjørn Sloth Tønnesen ast@fiberby.net
[ Upstream commit e25714466abd9d96901b15efddf82c60a38abd86 ]
Explicitly set 'rc' (return code), before jumping to the unlock and return path.
By not having any code depend on that 'rc' remains at it's initial value of -EINVAL, then we can re-use 'rc' for the return code of function calls in subsequent patches.
Only compile tested.
Signed-off-by: Asbjørn Sloth Tønnesen ast@fiberby.net Reviewed-by: Simon Horman horms@kernel.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 3010833ddde33..76aa5934e985b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1868,8 +1868,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; - int min_hlen, rc = -EINVAL; struct qede_arfs_tuple t; + int min_hlen, rc;
__qede_lock(edev);
@@ -1879,8 +1879,10 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, }
/* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) + if (qede_parse_flow_attr(edev, proto, f->rule, &t)) { + rc = -EINVAL; goto unlock; + }
/* Validate profile mode and number of filters */ if ((edev->arfs->filter_count && edev->arfs->mode != t.mode) || @@ -1888,12 +1890,15 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, DP_NOTICE(edev, "Filter configuration invalidated, filter mode=0x%x, configured mode=0x%x, filter count=0x%x\n", t.mode, edev->arfs->mode, edev->arfs->filter_count); + rc = -EINVAL; goto unlock; }
/* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) + if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) { + rc = -EINVAL; goto unlock; + }
if (qede_flow_find_fltr(edev, &t)) { rc = -EEXIST;
From: Thanassis Avgerinos thanassis.avgerinos@gmail.com
[ Upstream commit 38762a0763c10c24a4915feee722d7aa6e73eb98 ]
Ensure that packet_buffer_get respects the user_length provided. If the length of the head packet exceeds the user_length, packet_buffer_get will now return 0 to signify to the user that no data were read and a larger buffer size is required. Helps prevent user space overflows.
Signed-off-by: Thanassis Avgerinos thanassis.avgerinos@gmail.com Signed-off-by: Takashi Sakamoto o-takashi@sakamocchi.jp Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firewire/nosy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c index b0d671db178a8..ea31ac7ac1ca9 100644 --- a/drivers/firewire/nosy.c +++ b/drivers/firewire/nosy.c @@ -148,10 +148,12 @@ packet_buffer_get(struct client *client, char __user *data, size_t user_length) if (atomic_read(&buffer->size) == 0) return -ENODEV;
- /* FIXME: Check length <= user_length. */ + length = buffer->head->length; + + if (length > user_length) + return 0;
end = buffer->data + buffer->capacity; - length = buffer->head->length;
if (&buffer->head->data[length] < end) { if (copy_to_user(data, buffer->head->data, length))
From: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com
[ Upstream commit e8a6a5ad73acbafd98e8fd3f0cbf6e379771bb76 ]
The documentation for device_get_named_child_node() mentions this important point:
" The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. "
Add fwnode_handle_put() to avoid a leaked reference.
Signed-off-by: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com Link: https://lore.kernel.org/r/20240426153033.38500-1-pierre-louis.bossart@linux.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/da7219-aad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 4dc6eed6c18aa..99676c426f781 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -629,8 +629,10 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) return NULL;
aad_pdata = devm_kzalloc(dev, sizeof(*aad_pdata), GFP_KERNEL); - if (!aad_pdata) + if (!aad_pdata) { + fwnode_handle_put(aad_np); return NULL; + }
aad_pdata->irq = i2c->irq;
@@ -705,6 +707,8 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) else aad_pdata->adc_1bit_rpt = DA7219_AAD_ADC_1BIT_RPT_1;
+ fwnode_handle_put(aad_np); + return aad_pdata; }
From: Gabe Teeger gabe.teeger@amd.com
[ Upstream commit 9a35d205f466501dcfe5625ca313d944d0ac2d60 ]
New request from KMD/VBIOS in order to support new UMA carveout model. This fixes a null dereference from accessing Ctx->dc_bios->integrated_info while it was NULL.
DAL parses through the BIOS and extracts the necessary integrated_info but was missing a case for the new BIOS version 2.3.
Reviewed-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com Acked-by: Aurabindo Pillai aurabindo.pillai@amd.com Signed-off-by: Gabe Teeger gabe.teeger@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 228f098e5d88f..6bc8c6bee411e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2303,6 +2303,7 @@ static enum bp_result construct_integrated_info( result = get_integrated_info_v2_1(bp, info); break; case 2: + case 3: result = get_integrated_info_v2_2(bp, info); break; default:
From: Lancelot SIX lancelot.six@amd.com
[ Upstream commit f5b9053398e70a0c10aa9cb4dd5910ab6bc457c5 ]
There is a race condition when re-creating a kfd_process for a process. This has been observed when a process under the debugger executes exec(3). In this scenario: - The process executes exec. - This will eventually release the process's mm, which will cause the kfd_process object associated with the process to be freed (kfd_process_free_notifier decrements the reference count to the kfd_process to 0). This causes kfd_process_ref_release to enqueue kfd_process_wq_release to the kfd_process_wq. - The debugger receives the PTRACE_EVENT_EXEC notification, and tries to re-enable AMDGPU traps (KFD_IOC_DBG_TRAP_ENABLE). - When handling this request, KFD tries to re-create a kfd_process. This eventually calls kfd_create_process and kobject_init_and_add.
At this point the call to kobject_init_and_add can fail because the old kfd_process.kobj has not been freed yet by kfd_process_wq_release.
This patch proposes to avoid this race by making sure to drain kfd_process_wq before creating a new kfd_process object. This way, we know that any cleanup task is done executing when we reach kobject_init_and_add.
Signed-off-by: Lancelot SIX lancelot.six@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 21ec8a18cad29..7f69031f2b61a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -818,6 +818,14 @@ struct kfd_process *kfd_create_process(struct file *filep) if (process) { pr_debug("Process already found\n"); } else { + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + process = create_process(thread); if (IS_ERR(process)) goto out;
From: Linus Torvalds torvalds@linux-foundation.org
[ Upstream commit 02b670c1f88e78f42a6c5aee155c7b26960ca054 ]
The syzbot-reported stack trace from hell in this discussion thread actually has three nested page faults:
https://lore.kernel.org/r/000000000000d5f4fc0616e816d4@google.com
... and I think that's actually the important thing here:
- the first page fault is from user space, and triggers the vsyscall emulation.
- the second page fault is from __do_sys_gettimeofday(), and that should just have caused the exception that then sets the return value to -EFAULT
- the third nested page fault is due to _raw_spin_unlock_irqrestore() -> preempt_schedule() -> trace_sched_switch(), which then causes a BPF trace program to run, which does that bpf_probe_read_compat(), which causes that page fault under pagefault_disable().
It's quite the nasty backtrace, and there's a lot going on.
The problem is literally the vsyscall emulation, which sets
current->thread.sig_on_uaccess_err = 1;
and that causes the fixup_exception() code to send the signal *despite* the exception being caught.
And I think that is in fact completely bogus. It's completely bogus exactly because it sends that signal even when it *shouldn't* be sent - like for the BPF user mode trace gathering.
In other words, I think the whole "sig_on_uaccess_err" thing is entirely broken, because it makes any nested page-faults do all the wrong things.
Now, arguably, I don't think anybody should enable vsyscall emulation any more, but this test case clearly does.
I think we should just make the "send SIGSEGV" be something that the vsyscall emulation does on its own, not this broken per-thread state for something that isn't actually per thread.
The x86 page fault code actually tried to deal with the "incorrect nesting" by having that:
if (in_interrupt()) return;
which ignores the sig_on_uaccess_err case when it happens in interrupts, but as shown by this example, these nested page faults do not need to be about interrupts at all.
IOW, I think the only right thing is to remove that horrendously broken code.
The attached patch looks like the ObviouslyCorrect(tm) thing to do.
NOTE! This broken code goes back to this commit in 2011:
4fc3490114bb ("x86-64: Set siginfo and context on vsyscall emulation faults")
... and back then the reason was to get all the siginfo details right. Honestly, I do not for a moment believe that it's worth getting the siginfo details right here, but part of the commit says:
This fixes issues with UML when vsyscall=emulate.
... and so my patch to remove this garbage will probably break UML in this situation.
I do not believe that anybody should be running with vsyscall=emulate in 2024 in the first place, much less if you are doing things like UML. But let's see if somebody screams.
Reported-and-tested-by: syzbot+83e7f982ca045ab4405c@syzkaller.appspotmail.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Ingo Molnar mingo@kernel.org Tested-by: Jiri Olsa jolsa@kernel.org Acked-by: Andy Lutomirski luto@kernel.org Link: https://lore.kernel.org/r/CAHk-=wh9D6f7HUkDgZHKmDCHUQmp+Co89GP+b8+z+G56BKeyN... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/entry/vsyscall/vsyscall_64.c | 28 ++--------------------- arch/x86/include/asm/processor.h | 1 - arch/x86/mm/fault.c | 33 +-------------------------- 3 files changed, 3 insertions(+), 59 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index fd2ee9408e914..ba3172d5b3286 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
static bool write_ok_or_segv(unsigned long ptr, size_t size) { - /* - * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_err, this could go away. - */ - if (!access_ok((void __user *)ptr, size)) { struct thread_struct *thread = ¤t->thread;
@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) bool emulate_vsyscall(unsigned long error_code, struct pt_regs *regs, unsigned long address) { - struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_err; long ret; unsigned long orig_dx;
@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code, goto sigsegv; }
- tsk = current; - /* * Check for access_ok violations and find the syscall nr. * @@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code, goto do_ret; /* skip requested */
/* - * With a real vsyscall, page faults cause SIGSEGV. We want to - * preserve that behavior to make writing exploits harder. + * With a real vsyscall, page faults cause SIGSEGV. */ - prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; - current->thread.sig_on_uaccess_err = 1; - ret = -EFAULT; switch (vsyscall_nr) { case 0: @@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code, break; }
- current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; - check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); - - /* - * If we failed to generate a signal for any reason, - * generate one here. (This should be impossible.) - */ - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && - !sigismember(&tsk->pending.signal, SIGSEGV))) - goto sigsegv; - - return true; /* Don't emulate the ret. */ + goto sigsegv; }
regs->ax = ret; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bbbf27cfe7015..0702e0c5dbb8d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,6 @@ struct thread_struct { unsigned long iopl_emul;
unsigned int iopl_warn:1; - unsigned int sig_on_uaccess_err:1;
/* * Protection Keys Register for Userspace. Loaded immediately on diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index abc6fbc3d5f21..31afd82b95245 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -716,39 +716,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs));
/* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - }
/* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH
From: Nilay Shroff nilay@linux.ibm.com
[ Upstream commit 863fe60ed27f2c85172654a63c5b827e72c8b2e6 ]
On system where native nvme multipath is configured and iopolicy is set to numa but the nvme controller numa node id is undefined or -1 (NUMA_NO_NODE) then avoid calculating node distance for finding optimal io path. In such case we may access numa distance table with invalid index and that may potentially refer to incorrect memory. So this patch ensures that if the nvme controller numa node id is -1 then instead of calculating node distance for finding optimal io path, we set the numa node distance of such controller to default 10 (LOCAL_DISTANCE).
Link: https://lore.kernel.org/all/20240413090614.678353-1-nilay@linux.ibm.com/ Signed-off-by: Nilay Shroff nilay@linux.ibm.com Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Sagi Grimberg sagi@grimberg.me Reviewed-by: Chaitanya Kulkarni kch@nvidia.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/multipath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 73eddb67f0d24..f8ad43b5f0690 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -190,7 +190,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) if (nvme_path_is_disabled(ns)) continue;
- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) + if (ns->ctrl->numa_node != NUMA_NO_NODE && + READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) distance = node_distance(node, ns->ctrl->numa_node); else distance = LOCAL_DISTANCE;
From: Linus Torvalds torvalds@linux-foundation.org
[ Upstream commit 4efaa5acf0a1d2b5947f98abb3acf8bfd966422b ]
epoll can call out to vfs_poll() with a file pointer that may race with the last 'fput()'. That would make f_count go down to zero, and while the ep->mtx locking means that the resulting file pointer tear-down will be blocked until the poll returns, it means that f_count is already dead, and any use of it won't actually get a reference to the file any more: it's dead regardless.
Make sure we have a valid ref on the file pointer before we call down to vfs_poll() from the epoll routines.
Link: https://lore.kernel.org/lkml/0000000000002d631f0615918f1e@google.com/ Reported-by: syzbot+045b454ab35fd82a35fb@syzkaller.appspotmail.com Reviewed-by: Jens Axboe axboe@kernel.dk Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/eventpoll.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c254094c4c36..b60edddf17870 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -832,6 +832,34 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep return res; }
+/* + * The ffd.file pointer may be in the process of being torn down due to + * being closed, but we may not have finished eventpoll_release() yet. + * + * Normally, even with the atomic_long_inc_not_zero, the file may have + * been free'd and then gotten re-allocated to something else (since + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, users hold the ep->mtx mutex, and as such any file in + * the process of being free'd will block in eventpoll_release_file() + * and thus the underlying file allocation will not be free'd, and the + * file re-use cannot happen. + * + * For the same reason we can avoid a rcu_read_lock() around the + * operation - 'ffd.file' cannot go away even if the refcount has + * reached zero (but we must still not call out to ->poll() functions + * etc). + */ +static struct file *epi_fget(const struct epitem *epi) +{ + struct file *file; + + file = epi->ffd.file; + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + return file; +} + /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() @@ -840,14 +868,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct file *file = epi->ffd.file; + struct file *file = epi_fget(epi); __poll_t res;
+ /* + * We could return EPOLLERR | EPOLLHUP or something, but let's + * treat this more as "file doesn't exist, poll didn't happen". + */ + if (!file) + return 0; + pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); + fput(file); return res & epi->event.events; }
linux-stable-mirror@lists.linaro.org