January 2024 - Linux-stable-mirror

[PATCH 5.15.y] bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)

by kovalev＠altlinux.org

From: Peter Zijlstra <peterz(a)infradead.org> [ Upstream commit c86df29d11dfba27c0a1f5039cd6fe387fbf4239 ] The dispatcher function is currently abusing the ftrace __fentry__ call location for its own purposes -- this obviously gives trouble when the dispatcher and ftrace are both in use. A previous solution tried using __attribute__((patchable_function_entry())) which works, except it is GCC-8+ only, breaking the build on the earlier still supported compilers. Instead use static_call() -- which has its own annotations and does not conflict with ftrace -- to rewrite the dispatch function. By using: return static_call()(ctx, insni, bpf_func) you get a perfect forwarding tail call as function body (iow a single jmp instruction). By having the default static_call() target be bpf_dispatcher_nop_func() it retains the default behaviour (an indirect call to the argument function). Only once a dispatcher program is attached is the target rewritten to directly call the JIT'ed image. Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net> Tested-by: Björn Töpel <bjorn(a)kernel.org> Tested-by: Jiri Olsa <jolsa(a)kernel.org> Acked-by: Björn Töpel <bjorn(a)kernel.org> Acked-by: Jiri Olsa <jolsa(a)kernel.org> Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org> --- include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- kernel/bpf/dispatcher.c | 22 ++++++++-------------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 00c615fc8ec3c3..633b8842e617e8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/percpu-refcount.h> #include <linux/bpfptr.h> +#include <linux/static_call.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -765,6 +766,10 @@ struct bpf_dispatcher { void *image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -782,6 +787,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -793,20 +826,23 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ @@ -814,6 +850,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 2444bd15cc2d03..23042cfb5e809b 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include <linux/hash.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/static_call.h> /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new; - u32 noff; - int err; - - if (!prev_num_progs) { - old = NULL; - noff = 0; - } else { - old = d->image + d->image_off; + void *new; + u32 noff = 0; + + if (prev_num_progs) noff = d->image_off ^ (PAGE_SIZE / 2); - } new = d->num_progs ? d->image + noff : NULL; if (new) { @@ -122,11 +117,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); - if (err || !new) - return; + __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); - d->image_off = noff; + if (new) + d->image_off = noff; } void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, -- 2.33.8

1 year, 10 months

1
1
0 0

[PATCH] media: mxl5xx: Move xpt structures off stack

by Nathan Chancellor

When building for LoongArch with clang 18.0.0, the stack usage of probe() is larger than the allowed 2048 bytes: drivers/media/dvb-frontends/mxl5xx.c:1698:12: warning: stack frame size (2368) exceeds limit (2048) in 'probe' [-Wframe-larger-than] 1698 | static int probe(struct mxl *state, struct mxl5xx_cfg *cfg) | ^ 1 warning generated. This is the result of the linked LLVM commit, which changes how the arrays of structures in config_ts() get handled with CONFIG_INIT_STACK_ZERO and CONFIG_INIT_STACK_PATTERN, which causes the above warning in combination with inlining, as config_ts() gets inlined into probe(). This warning can be easily fixed by moving the array of structures off of the stackvia 'static const', which is a better location for these variables anyways because they are static data that is only ever read from, never modified, so allocating the stack space is wasteful. This drops the stack usage from 2368 bytes to 256 bytes with the same compiler and configuration. Cc: stable(a)vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1977 Link: https://github.com/llvm/llvm-project/commit/afe8b93ffdfef5d8879e1894b9d7dda… Signed-off-by: Nathan Chancellor <nathan(a)kernel.org> --- drivers/media/dvb-frontends/mxl5xx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/media/dvb-frontends/mxl5xx.c b/drivers/media/dvb-frontends/mxl5xx.c index 4ebbcf05cc09..91e9c378397c 100644 --- a/drivers/media/dvb-frontends/mxl5xx.c +++ b/drivers/media/dvb-frontends/mxl5xx.c @@ -1381,57 +1381,57 @@ static int config_ts(struct mxl *state, enum MXL_HYDRA_DEMOD_ID_E demod_id, u32 nco_count_min = 0; u32 clk_type = 0; - struct MXL_REG_FIELD_T xpt_sync_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_sync_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 8, 1}, {0x90700010, 9, 1}, {0x90700010, 10, 1}, {0x90700010, 11, 1}, {0x90700010, 12, 1}, {0x90700010, 13, 1}, {0x90700010, 14, 1}, {0x90700010, 15, 1} }; - struct MXL_REG_FIELD_T xpt_clock_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_clock_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 16, 1}, {0x90700010, 17, 1}, {0x90700010, 18, 1}, {0x90700010, 19, 1}, {0x90700010, 20, 1}, {0x90700010, 21, 1}, {0x90700010, 22, 1}, {0x90700010, 23, 1} }; - struct MXL_REG_FIELD_T xpt_valid_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_valid_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700014, 0, 1}, {0x90700014, 1, 1}, {0x90700014, 2, 1}, {0x90700014, 3, 1}, {0x90700014, 4, 1}, {0x90700014, 5, 1}, {0x90700014, 6, 1}, {0x90700014, 7, 1} }; - struct MXL_REG_FIELD_T xpt_ts_clock_phase[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_ts_clock_phase[MXL_HYDRA_DEMOD_MAX] = { {0x90700018, 0, 3}, {0x90700018, 4, 3}, {0x90700018, 8, 3}, {0x90700018, 12, 3}, {0x90700018, 16, 3}, {0x90700018, 20, 3}, {0x90700018, 24, 3}, {0x90700018, 28, 3} }; - struct MXL_REG_FIELD_T xpt_lsb_first[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_lsb_first[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 16, 1}, {0x9070000C, 17, 1}, {0x9070000C, 18, 1}, {0x9070000C, 19, 1}, {0x9070000C, 20, 1}, {0x9070000C, 21, 1}, {0x9070000C, 22, 1}, {0x9070000C, 23, 1} }; - struct MXL_REG_FIELD_T xpt_sync_byte[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_sync_byte[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 0, 1}, {0x90700010, 1, 1}, {0x90700010, 2, 1}, {0x90700010, 3, 1}, {0x90700010, 4, 1}, {0x90700010, 5, 1}, {0x90700010, 6, 1}, {0x90700010, 7, 1} }; - struct MXL_REG_FIELD_T xpt_enable_output[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_enable_output[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 0, 1}, {0x9070000C, 1, 1}, {0x9070000C, 2, 1}, {0x9070000C, 3, 1}, {0x9070000C, 4, 1}, {0x9070000C, 5, 1}, {0x9070000C, 6, 1}, {0x9070000C, 7, 1} }; - struct MXL_REG_FIELD_T xpt_err_replace_sync[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_err_replace_sync[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 24, 1}, {0x9070000C, 25, 1}, {0x9070000C, 26, 1}, {0x9070000C, 27, 1}, {0x9070000C, 28, 1}, {0x9070000C, 29, 1}, {0x9070000C, 30, 1}, {0x9070000C, 31, 1} }; - struct MXL_REG_FIELD_T xpt_err_replace_valid[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_err_replace_valid[MXL_HYDRA_DEMOD_MAX] = { {0x90700014, 8, 1}, {0x90700014, 9, 1}, {0x90700014, 10, 1}, {0x90700014, 11, 1}, {0x90700014, 12, 1}, {0x90700014, 13, 1}, {0x90700014, 14, 1}, {0x90700014, 15, 1} }; - struct MXL_REG_FIELD_T xpt_continuous_clock[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_continuous_clock[MXL_HYDRA_DEMOD_MAX] = { {0x907001D4, 0, 1}, {0x907001D4, 1, 1}, {0x907001D4, 2, 1}, {0x907001D4, 3, 1}, {0x907001D4, 4, 1}, {0x907001D4, 5, 1}, {0x907001D4, 6, 1}, {0x907001D4, 7, 1} }; - struct MXL_REG_FIELD_T xpt_nco_clock_rate[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_nco_clock_rate[MXL_HYDRA_DEMOD_MAX] = { {0x90700044, 16, 80}, {0x90700044, 16, 81}, {0x90700044, 16, 82}, {0x90700044, 16, 83}, {0x90700044, 16, 84}, {0x90700044, 16, 85}, --- base-commit: 02d4e62ae2452c83e4a3e279b8e4cb4dcbad4b31 change-id: 20240111-dvb-mxl5xx-move-structs-off-stack-e12172b1ef02 Best regards, -- Nathan Chancellor <nathan(a)kernel.org>

1 year, 10 months

3
3
0 0

[PATCH] Bluetooth: qca: fix device-address endianness

by Johan Hovold

The WCN6855 firmware on the Lenovo ThinkPad X13s expects the Bluetooth device address in MSB order when setting it using the EDL_WRITE_BD_ADDR_OPCODE command. Presumably, this is the case for all non-ROME devices which all use the EDL_WRITE_BD_ADDR_OPCODE command for this (unlike the ROME devices which use a different command and expect the address in LSB order). Reverse the little-endian address before setting it to make sure that the address can be configured using tools like btmgmt or using the 'local-bd-address' devicetree property. Note that this can potentially break systems with boot firmware which has started relying on the broken behaviour and is incorrectly passing the address via devicetree in MSB order. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable(a)vger.kernel.org # 5.1 Cc: Balakrishna Godavarthi <quic_bgodavar(a)quicinc.com> Cc: Matthias Kaehlcke <mka(a)chromium.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- Hi Qualcomm people, Could you please verify with your documentation that all non-ROME devices expect the address provided in the EDL_WRITE_BD_ADDR_OPCODE command in MSB order? I assume this is not something that anyone would change between firmware revisions, but if that turns out to be the case, we'd need to reverse the address based on firmware revision or similar. Johan drivers/bluetooth/btqca.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index fdb0fae88d1c..29035daf21bc 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); -- 2.41.0

1 year, 10 months

6
15
0 0

FAILED: patch "[PATCH] clocksource: Skip watchdog check for large watchdog intervals" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 644649553508b9bacf0fc7a5bdc4f9e0165576a5 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012905-carry-revolt-b8d5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: 644649553508 ("clocksource: Skip watchdog check for large watchdog intervals") c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 644649553508b9bacf0fc7a5bdc4f9e0165576a5 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner <jwiesner(a)suse.de> Date: Mon, 22 Jan 2024 18:23:50 +0100 Subject: [PATCH] clocksource: Skip watchdog check for large watchdog intervals There have been reports of the watchdog marking clocksources unstable on machines with 8 NUMA nodes: clocksource: timekeeping watchdog on CPU373: Marking clocksource 'tsc' as unstable because the skew is too large: clocksource: 'hpet' wd_nsec: 14523447520 clocksource: 'tsc' cs_nsec: 14524115132 The measured clocksource skew - the absolute difference between cs_nsec and wd_nsec - was 668 microseconds: cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612 The kernel used 200 microseconds for the uncertainty_margin of both the clocksource and watchdog, resulting in a threshold of 400 microseconds (the md variable). Both the cs_nsec and the wd_nsec value indicate that the readout interval was circa 14.5 seconds. The observed behaviour is that watchdog checks failed for large readout intervals on 8 NUMA node machines. This indicates that the size of the skew was directly proportinal to the length of the readout interval on those machines. The measured clocksource skew, 668 microseconds, was evaluated against a threshold (the md variable) that is suited for readout intervals of roughly WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second. The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") was to tighten the threshold for evaluating skew and set the lower bound for the uncertainty_margin of clocksources to twice WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to 125 microseconds to fit the limit of NTP, which is able to use a clocksource that suffers from up to 500 microseconds of skew per second. Both the TSC and the HPET use default uncertainty_margin. When the readout interval gets stretched the default uncertainty_margin is no longer a suitable lower bound for evaluating skew - it imposes a limit that is far stricter than the skew with which NTP can deal. The root causes of the skew being directly proportinal to the length of the readout interval are: * the inaccuracy of the shift/mult pairs of clocksources and the watchdog * the conversion to nanoseconds is imprecise for large readout intervals Prevent this by skipping the current watchdog check if the readout interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin (of the TSC and HPET) corresponds to a limit on clocksource skew of 250 ppm (microseconds of skew per second). To keep the limit imposed by NTP (500 microseconds of skew per second) for all possible readout intervals, the margins would have to be scaled so that the threshold value is proportional to the length of the actual readout interval. As for why the readout interval may get stretched: Since the watchdog is executed in softirq context the expiration of the watchdog timer can get severely delayed on account of a ksoftirqd thread not getting to run in a timely manner. Surely, a system with such belated softirq execution is not working well and the scheduling issue should be looked into but the clocksource watchdog should be able to deal with it accordingly. Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") Suggested-by: Feng Tang <feng.tang(a)intel.com> Signed-off-by: Jiri Wiesner <jwiesner(a)suse.de> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Tested-by: Paul E. McKenney <paulmck(a)kernel.org> Reviewed-by: Feng Tang <feng.tang(a)intel.com> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20240122172350.GA740@incl diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c108ed8a9804..3052b1f1168e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -99,6 +99,7 @@ static u64 suspend_start; * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) /* * Threshold: 0.0312s, when doubled: 0.0625s. @@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) {

1 year, 10 months

3
2
0 0

[PATCH 6.1.y 0/7] ASoC: codecs: es8326: fix support

by kovalev＠altlinux.org

These patches were backported from v6.6 upstream and are intended for 6.1.y stable kernel. Patches have been successfully tested on the latest 6.1.75 kernel. [PATCH 6.1.y 1/7] ASoC: codecs: es8326: Convert to i2c's .probe_new() [PATCH 6.1.y 2/7] ASoC: codecs: ES8326: Add es8326_mute function [PATCH 6.1.y 3/7] ASoC: codecs: ES8326: Change Hp_detect register names [PATCH 6.1.y 4/7] ASoC: codecs: ES8326: Change Volatile Reg function [PATCH 6.1.y 5/7] ASoC: codecs: ES8326: Fix power-up sequence [PATCH 6.1.y 6/7] ASOC: codecs: ES8326: Add calibration support for [PATCH 6.1.y 7/7] ASoC: codecs: ES8326: Update jact detection function

1 year, 10 months

3
10
0 0

[PATCH 5.10/5.15 v2 0/1 RFC] mm/truncate: fix WARNING in ext4_set_page_dirty()

by Roman Smirnov

Syzkaller reports warning in ext4_set_page_dirty() in 5.10 and 5.15 stable releases. It happens because invalidate_inode_page() frees pages that are needed for the system. To fix this we need to add additional checks to the function. page_mapped() checks if a page exists in the page tables, but this is not enough. The page can be used in other places: https://elixir.bootlin.com/linux/v6.8-rc1/source/include/linux/page_ref.h#L… Kernel outputs an error line related to direct I/O: https://syzkaller.appspot.com/text?tag=CrashLog&x=14ab52dac80000 The problem can be fixed in 5.10 and 5.15 stable releases by the following patch. The patch replaces page_mapped() call with check that finds additional references to the page excluding page cache and filesystem private data. If additional references exist, the page cannot be freed. This version does not include the first patch from the first version. The problem can be fixed without it. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Link: https://syzkaller.appspot.com/bug?extid=02f21431b65c214aa1d6 Matthew Wilcox (Oracle) (1): mm/truncate: Replace page_mapped() call in invalidate_inode_page() mm/truncate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- 2.34.1

1 year, 10 months

4
7
0 0

[PATCH] xen/events: close evtchn after mapping cleanup

by Maximilian Heyne

shutdown_pirq and startup_pirq are not taking the irq_mapping_update_lock because they can't due to lock inversion. Both are called with the irq_desc->lock being taking. The lock order, however, is first irq_mapping_update_lock and then irq_desc->lock. This opens multiple races: - shutdown_pirq can be interrupted by a function that allocates an event channel: CPU0 CPU1 shutdown_pirq { xen_evtchn_close(e) __startup_pirq { EVTCHNOP_bind_pirq -> returns just freed evtchn e set_evtchn_to_irq(e, irq) } xen_irq_info_cleanup() { set_evtchn_to_irq(e, -1) } } Assume here event channel e refers here to the same event channel number. After this race the evtchn_to_irq mapping for e is invalid (-1). - __startup_pirq races with __unbind_from_irq in a similar way. Because __startup_pirq doesn't take irq_mapping_update_lock it can grab the evtchn that __unbind_from_irq is currently freeing and cleaning up. In this case even though the event channel is allocated, its mapping can be unset in evtchn_to_irq. The fix is to first cleanup the mappings and then close the event channel. In this way, when an event channel gets allocated it's potential previous evtchn_to_irq mappings are guaranteed to be unset already. This is also the reverse order of the allocation where first the event channel is allocated and then the mappings are setup. On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal [un]bind interfaces"), we hit a BUG like the following during probing of NVMe devices. The issue is that during nvme_setup_io_queues, pci_free_irq is called for every device which results in a call to shutdown_pirq. With many nvme devices it's therefore likely to hit this race during boot because there will be multiple calls to shutdown_pirq and startup_pirq are running potentially in parallel. ------------[ cut here ]------------ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled kernel BUG at drivers/xen/events/events_base.c:499! invalid opcode: 0000 [#1] SMP PTI CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 Workqueue: nvme-reset-wq nvme_reset_work RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_trace_log_lvl+0x1c1/0x2d9 ? show_trace_log_lvl+0x1c1/0x2d9 ? set_affinity_irq+0xdc/0x1c0 ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0x90/0x110 ? bind_evtchn_to_cpu+0xdf/0xf0 ? do_error_trap+0x65/0x80 ? bind_evtchn_to_cpu+0xdf/0xf0 ? exc_invalid_op+0x4e/0x70 ? bind_evtchn_to_cpu+0xdf/0xf0 ? asm_exc_invalid_op+0x12/0x20 ? bind_evtchn_to_cpu+0xdf/0xf0 ? bind_evtchn_to_cpu+0xc5/0xf0 set_affinity_irq+0xdc/0x1c0 irq_do_set_affinity+0x1d7/0x1f0 irq_setup_affinity+0xd6/0x1a0 irq_startup+0x8a/0xf0 __setup_irq+0x639/0x6d0 ? nvme_suspend+0x150/0x150 request_threaded_irq+0x10c/0x180 ? nvme_suspend+0x150/0x150 pci_request_irq+0xa8/0xf0 ? __blk_mq_free_request+0x74/0xa0 queue_request_irq+0x6f/0x80 nvme_create_queue+0x1af/0x200 nvme_create_io_queues+0xbd/0xf0 nvme_setup_io_queues+0x246/0x320 ? nvme_irq_check+0x30/0x30 nvme_reset_work+0x1c8/0x400 process_one_work+0x1b0/0x350 worker_thread+0x49/0x310 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Modules linked in: ---[ end trace a11715de1eee1873 ]--- Fixes: d46a78b05c0e ("xen: implement pirq type event channels") Cc: stable(a)vger.kernel.org Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com> Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de> --- drivers/xen/events/events_base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b8cfea7812d6..3b9f080109d7 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { evtchn_port_t evtchn; + bool close_evtchn = false; if (!info) { xen_irq_free_desc(irq); @@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) struct xenbus_device *dev; if (!info->is_static) - xen_evtchn_close(evtchn); + close_evtchn = true; switch (info->type) { case IRQT_VIRQ: @@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) } xen_irq_info_cleanup(info); + + if (close_evtchn) + xen_evtchn_close(evtchn); } xen_free_irq(info); -- 2.40.1 Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879

1 year, 10 months

2
2
0 0

[PATCH v2] Revert "usb: typec: tcpm: fix cc role at port reset"

by Badhri Jagan Sridharan

This reverts commit 1e35f074399dece73d5df11847d4a0d7a6f49434. Given that ERROR_RECOVERY calls into PORT_RESET for Hi-Zing the CC pins, setting CC pins to default state during PORT_RESET breaks error recovery. 4.5.2.2.2.1 ErrorRecovery State Requirements The port shall not drive VBUS or VCONN, and shall present a high-impedance to ground (above zOPEN) on its CC1 and CC2 pins. Hi-Zing the CC pins is the inteded behavior for PORT_RESET. CC pins are set to default state after tErrorRecovery in PORT_RESET_WAIT_OFF. 4.5.2.2.2.2 Exiting From ErrorRecovery State A Sink shall transition to Unattached.SNK after tErrorRecovery. A Source shall transition to Unattached.SRC after tErrorRecovery. Cc: stable(a)vger.kernel.org Cc: Frank Wang <frank.wang(a)rock-chips.com> Fixes: 1e35f074399d ("usb: typec: tcpm: fix cc role at port reset") Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com> --- drivers/usb/typec/tcpm/tcpm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5945e3a2b0f7..9d410718eaf4 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4876,8 +4876,7 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? - TYPEC_CC_RD : tcpm_rp_cc(port)); + tcpm_set_cc(port, TYPEC_CC_OPEN); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; base-commit: 933bb7b878ddd0f8c094db45551a7daddf806e00 -- 2.43.0.429.g432eaa2c6b-goog

1 year, 10 months

5
4
0 0

[PATCH v2] arm64/signal: Don't assume that TIF_SVE means we saved SVE state

by Mark Brown

When we are in a syscall we will only save the FPSIMD subset even though the task still has access to the full register set, and on context switch we will only remove TIF_SVE when loading the register state. This means that the signal handling code should not assume that TIF_SVE means that the register state is stored in SVE format, it should instead check the format that was recorded during save. Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Signed-off-by: Mark Brown <broonie(a)kernel.org> Cc: stable(a)vger.kernel.org --- Changes in v2: - Rebase onto v6.8-rc2. - Link to v1: https://lore.kernel.org/r/20240119-arm64-sve-signal-regs-v1-1-b9fd61b0289a@… --- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/kernel/signal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a5dc6f764195..25ceaee6b025 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1635,7 +1635,7 @@ void fpsimd_preserve_current_state(void) void fpsimd_signal_preserve_current_state(void) { fpsimd_preserve_current_state(); - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(current); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0e8beb3349ea..425b1bc17a3f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); flags |= SVE_SIG_FLAG_SM; - } else if (test_thread_flag(TIF_SVE)) { + } else if (current->thread.fp_type == FP_STATE_SVE) { vq = sve_vq_from_vl(vl); } @@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE) || + if (add_all || current->thread.fp_type == FP_STATE_SVE || thread_sm_enabled(&current->thread)) { int vl = max(sve_max_vl(), sme_max_vl()); --- base-commit: 41bccc98fb7931d63d03f326a746ac4d429c1dd3 change-id: 20240118-arm64-sve-signal-regs-5711e0d10425 Best regards, -- Mark Brown <broonie(a)kernel.org>

1 year, 10 months

2
1
0 0

[PATCH v2] clocksource: timer-riscv: Clear timer interrupt on timer initialization

by Ley Foon Tan

In the RISC-V specification, the stimecmp register doesn't have a default value. To prevent the timer interrupt from being triggered during timer initialization, clear the timer interrupt by writing stimecmp with a maximum value. Fixes: 9f7a8ff6391f ("RISC-V: Prefer sstc extension if available") Cc: <stable(a)vger.kernel.org> Signed-off-by: Ley Foon Tan <leyfoon.tan(a)starfivetech.com> --- v2: Resolved comments from Anup. - Moved riscv_clock_event_stop() to riscv_timer_starting_cpu(). - Added Fixes tag --- drivers/clocksource/timer-riscv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index e66dcbd66566..672669eb7281 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -116,6 +116,9 @@ static int riscv_timer_starting_cpu(unsigned int cpu) ce->rating = 450; clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff); + /* Clear timer interrupt */ + riscv_clock_event_stop(); + enable_percpu_irq(riscv_clock_event_irq, irq_get_trigger_type(riscv_clock_event_irq)); return 0; -- 2.43.0

1 year, 10 months

3
2
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror January 2024