From: Hou Tao houtao1@huawei.com
[ Upstream commit 87e9675a0dfd0bf4a36550e4a0e673038ec67aee ]
When security_bpf_map_create() in map_create() fails, map_create() will call btf_put() and ->map_free() callback to free the map. It doesn't free the btf_record of map value, so add the missed btf_record_free() when map creation fails.
However btf_record_free() needs to be called after ->map_free() just like bpf_map_free_deferred() did, because ->map_free() may use the btf_record to free the special fields in preallocated map value. So factor out bpf_map_free() helper to free the map, btf_record, and btf orderly and use the helper in both map_create() and bpf_map_free_deferred().
Signed-off-by: Hou Tao houtao1@huawei.com Acked-by: Jiri Olsa jolsa@kernel.org Link: https://lore.kernel.org/r/20240912012845.3458483-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/syscall.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bf6c5f685ea22..931ae9ad78149 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -733,15 +733,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) } }
-/* called from workqueue */ -static void bpf_map_free_deferred(struct work_struct *work) +static void bpf_map_free(struct bpf_map *map) { - struct bpf_map *map = container_of(work, struct bpf_map, work); struct btf_record *rec = map->record; struct btf *btf = map->btf;
- security_bpf_map_free(map); - bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); /* Delay freeing of btf_record for maps, as map_free @@ -760,6 +756,16 @@ static void bpf_map_free_deferred(struct work_struct *work) btf_put(btf); }
+/* called from workqueue */ +static void bpf_map_free_deferred(struct work_struct *work) +{ + struct bpf_map *map = container_of(work, struct bpf_map, work); + + security_bpf_map_free(map); + bpf_map_release_memcg(map); + bpf_map_free(map); +} + static void bpf_map_put_uref(struct bpf_map *map) { if (atomic64_dec_and_test(&map->usercnt)) { @@ -1411,8 +1417,7 @@ static int map_create(union bpf_attr *attr) free_map_sec: security_bpf_map_free(map); free_map: - btf_put(map->btf); - map->ops->map_free(map); + bpf_map_free(map); put_token: bpf_token_put(token); return err;
From: Daniel Borkmann daniel@iogearbox.net
[ Upstream commit b8e188f023e07a733b47d5865311ade51878fe40 ]
The assumption of 'in privileged mode reads from uninitialized stack locations are permitted' is not quite correct since the verifier was probing for read access rather than write access. Both tests need to be annotated as __success for privileged and unprivileged.
Signed-off-by: Daniel Borkmann daniel@iogearbox.net Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/r/20240913191754.13290-6-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/bpf/progs/verifier_int_ptr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index 9fc3fae5cd833..87206803c0255 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -8,7 +8,6 @@ SEC("socket") __description("ARG_PTR_TO_LONG uninitialized") __success -__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8") __naked void arg_ptr_to_long_uninitialized(void) { asm volatile (" \ @@ -36,9 +35,7 @@ __naked void arg_ptr_to_long_uninitialized(void)
SEC("socket") __description("ARG_PTR_TO_LONG half-uninitialized") -/* in privileged mode reads from uninitialized stack locations are permitted */ -__success __failure_unpriv -__msg_unpriv("invalid indirect read from stack R4 off -16+4 size 8") +__success __retval(0) __naked void ptr_to_long_half_uninitialized(void) {
From: Yonghong Song yonghong.song@linux.dev
[ Upstream commit 7dd34d7b7dcf9309fc6224caf4dd5b35bedddcb7 ]
Zac Ecob reported a problem where a bpf program may cause kernel crash due to the following error: Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
The failure is due to the below signed divide: LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808. LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808, but it is impossible since for 64-bit system, the maximum positive number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is LLONG_MIN.
Further investigation found all the following sdiv/smod cases may trigger an exception when bpf program is running on x86_64 platform: - LLONG_MIN/-1 for 64bit operation - INT_MIN/-1 for 32bit operation - LLONG_MIN%-1 for 64bit operation - INT_MIN%-1 for 32bit operation where -1 can be an immediate or in a register.
On arm64, there are no exceptions: - LLONG_MIN/-1 = LLONG_MIN - INT_MIN/-1 = INT_MIN - LLONG_MIN%-1 = 0 - INT_MIN%-1 = 0 where -1 can be an immediate or in a register.
Insn patching is needed to handle the above cases and the patched codes produced results aligned with above arm64 result. The below are pseudo codes to handle sdiv/smod exceptions including both divisor -1 and divisor 0 and the divisor is stored in a register.
sdiv: tmp = rX tmp += 1 /* [-1, 0] -> [0, 1] if tmp >(unsigned) 1 goto L2 if tmp == 0 goto L1 rY = 0 L1: rY = -rY; goto L3 L2: rY /= rX L3:
smod: tmp = rX tmp += 1 /* [-1, 0] -> [0, 1] if tmp >(unsigned) 1 goto L1 if tmp == 1 (is64 ? goto L2 : goto L3) rY = 0; goto L2 L1: rY %= rX L2: goto L4 // only when !is64 L3: wY = wY // only when !is64 L4:
[1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961f...
Reported-by: Zac Ecob zacecob@protonmail.com Signed-off-by: Yonghong Song yonghong.song@linux.dev Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/r/20240913150326.1187788-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d8520095ca030..f3e6b0c17a8b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19911,13 +19911,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env) /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
- /* Make divide-by-zero exceptions impossible. */ + /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ + if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || + insn->code == (BPF_ALU | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && + insn->off == 1 && insn->imm == -1) { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_sdiv[] = { + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + }; + struct bpf_insn chk_and_smod[] = { + BPF_MOV32_IMM(insn->dst_reg, 0), + }; + + patchlet = isdiv ? chk_and_sdiv : chk_and_smod; + cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod); + + new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + + /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; bool isdiv = BPF_OP(insn->code) == BPF_DIV; + bool is_sdiv = isdiv && insn->off == 1; + bool is_smod = !isdiv && insn->off == 1; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { /* [R,W]x div 0 -> 0 */ @@ -19937,10 +19970,62 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BPF_JMP_IMM(BPF_JA, 0, 0, 1), BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; + struct bpf_insn chk_and_sdiv[] = { + /* [R,W]x sdiv 0 -> 0 + * LLONG_MIN sdiv -1 -> LLONG_MIN + * INT_MIN sdiv -1 -> INT_MIN + */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 4, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 1, 0), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_MOV | BPF_K, insn->dst_reg, + 0, 0, 0), + /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + }; + struct bpf_insn chk_and_smod[] = { + /* [R,W]x mod 0 -> [R,W]x */ + /* [R,W]x mod -1 -> 0 */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 3, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 3 + (is64 ? 0 : 1), 1), + BPF_MOV32_IMM(insn->dst_reg, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), + };
- patchlet = isdiv ? chk_and_div : chk_and_mod; - cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + if (is_sdiv) { + patchlet = chk_and_sdiv; + cnt = ARRAY_SIZE(chk_and_sdiv); + } else if (is_smod) { + patchlet = chk_and_smod; + cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0); + } else { + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + }
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog)
From: Tao Chen chen.dylane@gmail.com
[ Upstream commit 1d244784be6b01162b732a5a7d637dfc024c3203 ]
Percpu map is often used, but the map value size limit often ignored, like issue: https://github.com/iovisor/bcc/issues/2519. Actually, percpu map value size is bound by PCPU_MIN_UNIT_SIZE, so we can check the value size whether it exceeds PCPU_MIN_UNIT_SIZE first, like percpu map of local_storage. Maybe the error message seems clearer compared with "cannot allocate memory".
Signed-off-by: Jinke Han jinkehan@didiglobal.com Signed-off-by: Tao Chen chen.dylane@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Acked-by: Jiri Olsa jolsa@kernel.org Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/bpf/20240910144111.1464912-2-chen.dylane@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/arraymap.c | 3 +++ kernel/bpf/hashtab.c | 3 +++ 2 files changed, 6 insertions(+)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index feabc01938525..a5c6f8aa49015 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -73,6 +73,9 @@ int array_map_alloc_check(union bpf_attr *attr) /* avoid overflow on round_up(map->value_size) */ if (attr->value_size > INT_MAX) return -E2BIG; + /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ + if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) + return -E2BIG;
return 0; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 06115f8728e89..bcb74ac880cb5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -462,6 +462,9 @@ static int htab_map_alloc_check(union bpf_attr *attr) * kmalloc-able later in htab_map_update_elem() */ return -E2BIG; + /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ + if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) + return -E2BIG;
return 0; }
From: Kuan-Wei Chiu visitorckw@gmail.com
[ Upstream commit f04e2ad394e2755d0bb2d858ecb5598718bf00d5 ]
When netfilter has no entry to display, qsort is called with qsort(NULL, 0, ...). This results in undefined behavior, as UBSan reports:
net.c:827:2: runtime error: null pointer passed as argument 1, which is declared to never be null
Although the C standard does not explicitly state whether calling qsort with a NULL pointer when the size is 0 constitutes undefined behavior, Section 7.1.4 of the C standard (Use of library functions) mentions:
"Each of the following statements applies unless explicitly stated otherwise in the detailed descriptions that follow: If an argument to a function has an invalid value (such as a value outside the domain of the function, or a pointer outside the address space of the program, or a null pointer, or a pointer to non-modifiable storage when the corresponding parameter is not const-qualified) or a type (after promotion) not expected by a function with variable number of arguments, the behavior is undefined."
To avoid this, add an early return when nf_link_info is NULL to prevent calling qsort with a NULL pointer.
Signed-off-by: Kuan-Wei Chiu visitorckw@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Reviewed-by: Quentin Monnet qmo@kernel.org Link: https://lore.kernel.org/bpf/20240910150207.3179306-1-visitorckw@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/bpf/bpftool/net.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 968714b4c3d45..0ad684e810f34 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -824,6 +824,9 @@ static void show_link_netfilter(void) nf_link_count++; }
+ if (!nf_link_info) + return; + qsort(nf_link_info, nf_link_count, sizeof(*nf_link_info), netfilter_link_compar);
for (id = 0; id < nf_link_count; id++) {
From: Kuan-Wei Chiu visitorckw@gmail.com
[ Upstream commit 4cdc0e4ce5e893bc92255f5f734d983012f2bc2e ]
Replace shifts of '1' with '1U' in bitwise operations within __show_dev_tc_bpf() to prevent undefined behavior caused by shifting into the sign bit of a signed integer. By using '1U', the operations are explicitly performed on unsigned integers, avoiding potential integer overflow or sign-related issues.
Signed-off-by: Kuan-Wei Chiu visitorckw@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Acked-by: Quentin Monnet qmo@kernel.org Link: https://lore.kernel.org/bpf/20240908140009.3149781-1-visitorckw@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/bpf/bpftool/net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 0ad684e810f34..0f2106218e1f0 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -482,9 +482,9 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev, if (prog_flags[i] || json_output) { NET_START_ARRAY("prog_flags", "%s "); for (j = 0; prog_flags[i] && j < 32; j++) { - if (!(prog_flags[i] & (1 << j))) + if (!(prog_flags[i] & (1U << j))) continue; - NET_DUMP_UINT_ONLY(1 << j); + NET_DUMP_UINT_ONLY(1U << j); } NET_END_ARRAY(""); } @@ -493,9 +493,9 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev, if (link_flags[i] || json_output) { NET_START_ARRAY("link_flags", "%s "); for (j = 0; link_flags[i] && j < 32; j++) { - if (!(link_flags[i] & (1 << j))) + if (!(link_flags[i] & (1U << j))) continue; - NET_DUMP_UINT_ONLY(1 << j); + NET_DUMP_UINT_ONLY(1U << j); } NET_END_ARRAY(""); }
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit fccb175bc89a0d37e3ff513bb6bf1f73b3a48950 ]
Only a couple of files of the decompressor are compiled with the minimum architecture level. This is problematic for potential function calls between compile units, especially if a target function is within a compile until compiled for a higher architecture level, since that may lead to an unexpected operation exception.
Therefore compile all files of the decompressor for the same (minimum) architecture level.
Reviewed-by: Sven Schnelle svens@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/boot/Makefile | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 4f476884d3404..f1b2989cc7cca 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -11,11 +11,8 @@ KASAN_SANITIZE := n KCSAN_SANITIZE := n KMSAN_SANITIZE := n
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) -KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) - # -# Use minimum architecture for als.c to be able to print an error +# Use minimum architecture level so it is possible to print an error # message if the kernel is started on a machine which is too old # ifndef CONFIG_CC_IS_CLANG @@ -24,16 +21,10 @@ else CC_FLAGS_MARCH_MINIMUM := -march=z10 endif
-ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM)) -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM) -AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) -AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM) -endif +KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR)) +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR)) +KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) +KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit 0147addc4fb72a39448b8873d8acdf3a0f29aa65 ]
Disable compile time optimizations of test_facility() for the decompressor. The decompressor should not contain any optimized code depending on the architecture level set the kernel image is compiled for to avoid unexpected operation exceptions.
Add a __DECOMPRESSOR check to test_facility() to enforce that facilities are always checked during runtime for the decompressor.
Reviewed-by: Sven Schnelle svens@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/include/asm/facility.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index b7d234838a366..65ebf86506cde 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -59,8 +59,10 @@ static inline int test_facility(unsigned long nr) unsigned long facilities_als[] = { FACILITIES_ALS };
if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) { - if (__test_facility(nr, &facilities_als)) - return 1; + if (__test_facility(nr, &facilities_als)) { + if (!__is_defined(__DECOMPRESSOR)) + return 1; + } } return __test_facility(nr, &stfle_fac_list); }
From: Gerald Schaefer gerald.schaefer@linux.ibm.com
[ Upstream commit 131b8db78558120f58c5dc745ea9655f6b854162 ]
Adding/removing large amount of pages at once to/from the CMM balloon can result in rcu_sched stalls or workqueue lockups, because of busy looping w/o cond_resched().
Prevent this by adding a cond_resched(). cmm_free_pages() holds a spin_lock while looping, so it cannot be added directly to the existing loop. Instead, introduce a wrapper function that operates on maximum 256 pages at once, and add it there.
Signed-off-by: Gerald Schaefer gerald.schaefer@linux.ibm.com Reviewed-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/mm/cmm.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 75d15bf41d97c..d01724a715d0f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -95,11 +95,12 @@ static long cmm_alloc_pages(long nr, long *counter, (*counter)++; spin_unlock(&cmm_lock); nr--; + cond_resched(); } return nr; }
-static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) +static long __cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) { struct cmm_page_array *pa; unsigned long addr; @@ -123,6 +124,21 @@ static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) return nr; }
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) +{ + long inc = 0; + + while (nr) { + inc = min(256L, nr); + nr -= inc; + inc = __cmm_free_pages(inc, counter, list); + if (inc) + break; + cond_resched(); + } + return nr + inc; +} + static int cmm_oom_notify(struct notifier_block *self, unsigned long dummy, void *parm) {
From: Yonghong Song yonghong.song@linux.dev
[ Upstream commit c8831bdbfbab672c006a18006d36932a494b2fd6 ]
Daniel Hodges reported a jit error when playing with a sched-ext program. The error message is: unexpected jmp_cond padding: -4 bytes
But further investigation shows the error is actual due to failed convergence. The following are some analysis:
... pass4, final_proglen=4391: ... 20e: 48 85 ff test rdi,rdi 211: 74 7d je 0x290 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] ... 289: 48 85 ff test rdi,rdi 28c: 74 17 je 0x2a5 28e: e9 7f ff ff ff jmp 0x212 293: bf 03 00 00 00 mov edi,0x3
Note that insn at 0x211 is 2-byte cond jump insn for offset 0x7d (-125) and insn at 0x28e is 5-byte jmp insn with offset -129.
pass5, final_proglen=4392: ... 20e: 48 85 ff test rdi,rdi 211: 0f 84 80 00 00 00 je 0x297 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] ... 28d: 48 85 ff test rdi,rdi 290: 74 1a je 0x2ac 292: eb 84 jmp 0x218 294: bf 03 00 00 00 mov edi,0x3
Note that insn at 0x211 is 6-byte cond jump insn now since its offset becomes 0x80 based on previous round (0x293 - 0x213 = 0x80). At the same time, insn at 0x292 is a 2-byte insn since its offset is -124.
pass6 will repeat the same code as in pass4. pass7 will repeat the same code as in pass5, and so on. This will prevent eventual convergence.
Passes 1-14 are with padding = 0. At pass15, padding is 1 and related insn looks like:
211: 0f 84 80 00 00 00 je 0x297 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] ... 24d: 48 85 d2 test rdx,rdx
The similar code in pass14: 211: 74 7d je 0x290 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] ... 249: 48 85 d2 test rdx,rdx 24c: 74 21 je 0x26f 24e: 48 01 f7 add rdi,rsi ...
Before generating the following insn, 250: 74 21 je 0x273 "padding = 1" enables some checking to ensure nops is either 0 or 4 where #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) nops = INSN_SZ_DIFF - 2
In this specific case, addrs[i] = 0x24e // from pass14 addrs[i-1] = 0x24d // from pass15 prog - temp = 3 // from 'test rdx,rdx' in pass15 so nops = -4 and this triggers the failure.
To fix the issue, we need to break cycles of je <-> jmp. For example, in the above case, we have 211: 74 7d je 0x290 the offset is 0x7d. If 2-byte je insn is generated only if the offset is less than 0x7d (<= 0x7c), the cycle can be break and we can achieve the convergence.
I did some study on other cases like je <-> je, jmp <-> je and jmp <-> jmp which may cause cycles. Those cases are not from actual reproducible cases since it is pretty hard to construct a test case for them. the results show that the offset <= 0x7b (0x7b = 123) should be enough to cover all cases. This patch added a new helper to generate 8-bit cond/uncond jmp insns only if the offset range is [-128, 123].
Reported-by: Daniel Hodges hodgesd@meta.com Signed-off-by: Yonghong Song yonghong.song@linux.dev Link: https://lore.kernel.org/r/20240904221251.37109-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/net/bpf_jit_comp.c | 54 +++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d25d81c8ecc00..f609dc379be75 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -64,6 +64,56 @@ static bool is_imm8(int value) return value <= 127 && value >= -128; }
+/* + * Let us limit the positive offset to be <= 123. + * This is to ensure eventual jit convergence For the following patterns: + * ... + * pass4, final_proglen=4391: + * ... + * 20e: 48 85 ff test rdi,rdi + * 211: 74 7d je 0x290 + * 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] + * ... + * 289: 48 85 ff test rdi,rdi + * 28c: 74 17 je 0x2a5 + * 28e: e9 7f ff ff ff jmp 0x212 + * 293: bf 03 00 00 00 mov edi,0x3 + * Note that insn at 0x211 is 2-byte cond jump insn for offset 0x7d (-125) + * and insn at 0x28e is 5-byte jmp insn with offset -129. + * + * pass5, final_proglen=4392: + * ... + * 20e: 48 85 ff test rdi,rdi + * 211: 0f 84 80 00 00 00 je 0x297 + * 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] + * ... + * 28d: 48 85 ff test rdi,rdi + * 290: 74 1a je 0x2ac + * 292: eb 84 jmp 0x218 + * 294: bf 03 00 00 00 mov edi,0x3 + * Note that insn at 0x211 is 6-byte cond jump insn now since its offset + * becomes 0x80 based on previous round (0x293 - 0x213 = 0x80). + * At the same time, insn at 0x292 is a 2-byte insn since its offset is + * -124. + * + * pass6 will repeat the same code as in pass4 and this will prevent + * eventual convergence. + * + * To fix this issue, we need to break je (2->6 bytes) <-> jmp (5->2 bytes) + * cycle in the above. In the above example je offset <= 0x7c should work. + * + * For other cases, je <-> je needs offset <= 0x7b to avoid no convergence + * issue. For jmp <-> je and jmp <-> jmp cases, jmp offset <= 0x7c should + * avoid no convergence issue. + * + * Overall, let us limit the positive offset for 8bit cond/uncond jmp insn + * to maximum 123 (0x7b). This way, the jit pass can eventually converge. + */ +static bool is_imm8_jmp_offset(int value) +{ + return value <= 123 && value >= -128; +} + static bool is_simm32(s64 value) { return value == (s64)(s32)value; @@ -2184,7 +2234,7 @@ st: if (is_imm8(insn->off)) return -EFAULT; } jmp_offset = addrs[i + insn->off] - addrs[i]; - if (is_imm8(jmp_offset)) { + if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To keep the jmp_offset valid, the extra bytes are * padded before the jump insn, so we subtract the @@ -2266,7 +2316,7 @@ st: if (is_imm8(insn->off)) break; } emit_jmp: - if (is_imm8(jmp_offset)) { + if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To avoid breaking jmp_offset, the extra bytes * are padded before the actual jmp insn, so
From: Artem Sadovnikov ancowi69@gmail.com
[ Upstream commit cc749e61c011c255d81b192a822db650c68b313f ]
Fuzzing reports a possible deadlock in jbd2_log_wait_commit.
This issue is triggered when an EXT4_IOC_MIGRATE ioctl is set to require synchronous updates because the file descriptor is opened with O_SYNC. This can lead to the jbd2_journal_stop() function calling jbd2_might_wait_for_commit(), potentially causing a deadlock if the EXT4_IOC_MIGRATE call races with a write(2) system call.
This problem only arises when CONFIG_PROVE_LOCKING is enabled. In this case, the jbd2_might_wait_for_commit macro locks jbd2_handle in the jbd2_journal_stop function while i_data_sem is locked. This triggers lockdep because the jbd2_journal_start function might also lock the same jbd2_handle simultaneously.
Found by Linux Verification Center (linuxtesting.org) with syzkaller.
Reviewed-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Co-developed-by: Mikhail Ukhin mish.uxin2012@yandex.ru Signed-off-by: Mikhail Ukhin mish.uxin2012@yandex.ru Signed-off-by: Artem Sadovnikov ancowi69@gmail.com Rule: add Link: https://lore.kernel.org/stable/20240404095000.5872-1-mish.uxin2012%40yandex.... Link: https://patch.msgid.link/20240829152210.2754-1-ancowi69@gmail.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d98ac2af8199f..a5e1492bbaaa5 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -663,8 +663,8 @@ int ext4_ind_migrate(struct inode *inode) if (unlikely(ret2 && !ret)) ret = ret2; errout: - ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); + ext4_journal_stop(handle); out_unlock: ext4_writepages_up_write(inode->i_sb, alloc_ctx); return ret;
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 4e2524ba2ca5f54bdbb9e5153bea00421ef653f5 ]
In ext4_find_extent(), path may be freed by error or be reallocated, so using a previously saved *ppath may have been freed and thus may trigger use-after-free, as follows:
ext4_split_extent path = *ppath; ext4_split_extent_at(ppath) path = ext4_find_extent(ppath) ext4_split_extent_at(ppath) // ext4_find_extent fails to free path // but zeroout succeeds ext4_ext_show_leaf(inode, path) eh = path[depth].p_hdr // path use-after-free !!!
Similar to ext4_split_extent_at(), we use *ppath directly as an input to ext4_ext_show_leaf(). Fix a spelling error by the way.
Same problem in ext4_ext_handle_unwritten_extents(). Since 'path' is only used in ext4_ext_show_leaf(), remove 'path' and use *ppath directly.
This issue is triggered only when EXT_DEBUG is defined and therefore does not affect functionality.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Ojaswin Mujoo ojaswin@linux.ibm.com Tested-by: Ojaswin Mujoo ojaswin@linux.ibm.com Link: https://patch.msgid.link/20240822023545.1994557-5-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/extents.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e067f2dd0335c..7954430f886d8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3287,7 +3287,7 @@ static int ext4_split_extent_at(handle_t *handle, }
/* - * ext4_split_extents() splits an extent and mark extent which is covered + * ext4_split_extent() splits an extent and mark extent which is covered * by @map as split_flags indicates * * It may result in splitting the extent into multiple extents (up to three) @@ -3363,7 +3363,7 @@ static int ext4_split_extent(handle_t *handle, goto out; }
- ext4_ext_show_leaf(inode, path); + ext4_ext_show_leaf(inode, *ppath); out: return err ? err : allocated; } @@ -3828,14 +3828,13 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_ext_path **ppath, int flags, unsigned int allocated, ext4_fsblk_t newblock) { - struct ext4_ext_path __maybe_unused *path = *ppath; int ret = 0; int err = 0;
ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", (unsigned long long)map->m_lblk, map->m_len, flags, allocated); - ext4_ext_show_leaf(inode, path); + ext4_ext_show_leaf(inode, *ppath);
/* * When writing into unwritten space, we should not fail to @@ -3932,7 +3931,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, if (allocated > map->m_len) allocated = map->m_len; map->m_len = allocated; - ext4_ext_show_leaf(inode, path); + ext4_ext_show_leaf(inode, *ppath); out2: return err ? err : allocated; }
From: Thadeu Lima de Souza Cascardo cascardo@igalia.com
[ Upstream commit cd69f8f9de280e331c9e6ff689ced0a688a9ce8f ]
ext4_search_dir currently returns -1 in case of a failure, while it returns 0 when the name is not found. In such failure cases, it should return an error code instead.
This becomes even more important when ext4_find_inline_entry returns an error code as well in the next commit.
-EFSCORRUPTED seems appropriate as such error code as these failures would be caused by unexpected record lengths and is in line with other instances of ext4_check_dir_entry failures.
In the case of ext4_dx_find_entry, the current use of ERR_BAD_DX_DIR was left as is to reduce the risk of regressions.
Signed-off-by: Thadeu Lima de Souza Cascardo cascardo@igalia.com Link: https://patch.msgid.link/20240821152324.3621860-2-cascardo@igalia.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/namei.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6a95713f9193b..8af437ac30511 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1482,7 +1482,7 @@ static bool ext4_match(struct inode *parent, }
/* - * Returns 0 if not found, -1 on failure, and 1 on success + * Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success */ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct inode *dir, struct ext4_filename *fname, @@ -1503,7 +1503,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, * a full check */ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf, buf_size, offset)) - return -1; + return -EFSCORRUPTED; *res_dir = de; return 1; } @@ -1511,7 +1511,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); if (de_len <= 0) - return -1; + return -EFSCORRUPTED; offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } @@ -1663,8 +1663,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, goto cleanup_and_exit; } else { brelse(bh); - if (i < 0) + if (i < 0) { + ret = ERR_PTR(i); goto cleanup_and_exit; + } } next: if (++block >= nblocks) @@ -1758,7 +1760,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, if (retval == 1) goto success; brelse(bh); - if (retval == -1) { + if (retval < 0) { bh = ERR_PTR(ERR_BAD_DX_DIR); goto errout; }
From: Pankaj Raghav p.raghav@samsung.com
[ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
iomap_dio_zero() will pad a fs block with zeroes if the direct IO size < fs block size. iomap_dio_zero() has an implicit assumption that fs block size < page_size. This is true for most filesystems at the moment.
If the block size > page size, this will send the contents of the page next to zero page(as len > PAGE_SIZE) to the underlying block device, causing FS corruption.
iomap is a generic infrastructure and it should not make any assumptions about the fs block size and the page size of the system.
Signed-off-by: Pankaj Raghav p.raghav@samsung.com Link: https://lore.kernel.org/r/20240822135018.1931258-7-kernel@pankajraghav.com Reviewed-by: Hannes Reinecke hare@suse.de Reviewed-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Dave Chinner dchinner@redhat.com Reviewed-by: Daniel Gomez da.gomez@samsung.com Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/iomap/buffered-io.c | 4 ++-- fs/iomap/direct-io.c | 45 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f420c53d86acc..d745f718bcde8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -2007,10 +2007,10 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, } EXPORT_SYMBOL_GPL(iomap_writepages);
-static int __init iomap_init(void) +static int __init iomap_buffered_init(void) { return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), offsetof(struct iomap_ioend, io_bio), BIOSET_NEED_BVECS); } -fs_initcall(iomap_init); +fs_initcall(iomap_buffered_init); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index f3b43d223a46e..c02b266bba525 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -11,6 +11,7 @@ #include <linux/iomap.h> #include <linux/backing-dev.h> #include <linux/uio.h> +#include <linux/set_memory.h> #include <linux/task_io_accounting_ops.h> #include "trace.h"
@@ -27,6 +28,13 @@ #define IOMAP_DIO_WRITE (1U << 30) #define IOMAP_DIO_DIRTY (1U << 31)
+/* + * Used for sub block zeroing in iomap_dio_zero() + */ +#define IOMAP_ZERO_PAGE_SIZE (SZ_64K) +#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE)) +static struct page *zero_page; + struct iomap_dio { struct kiocb *iocb; const struct iomap_dio_ops *dops; @@ -232,13 +240,20 @@ void iomap_dio_bio_end_io(struct bio *bio) } EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
-static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, +static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, loff_t pos, unsigned len) { struct inode *inode = file_inode(dio->iocb->ki_filp); - struct page *page = ZERO_PAGE(0); struct bio *bio;
+ if (!len) + return 0; + /* + * Max block size supported is 64k + */ + if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE)) + return -EINVAL; + bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, GFP_KERNEL); @@ -246,8 +261,9 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io;
- __bio_add_page(bio, page, len, 0); + __bio_add_page(bio, zero_page, len, 0); iomap_dio_submit_bio(iter, dio, bio, pos); + return 0; }
/* @@ -356,8 +372,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, if (need_zeroout) { /* zero out from the start of the block to the write offset */ pad = pos & (fs_block_size - 1); - if (pad) - iomap_dio_zero(iter, dio, pos - pad, pad); + + ret = iomap_dio_zero(iter, dio, pos - pad, pad); + if (ret) + goto out; }
/* @@ -431,7 +449,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, /* zero out from the end of the write to the end of the block */ pad = pos & (fs_block_size - 1); if (pad) - iomap_dio_zero(iter, dio, pos, fs_block_size - pad); + ret = iomap_dio_zero(iter, dio, pos, + fs_block_size - pad); } out: /* Undo iter limitation to current extent */ @@ -753,3 +772,17 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, return iomap_dio_complete(dio); } EXPORT_SYMBOL_GPL(iomap_dio_rw); + +static int __init iomap_dio_init(void) +{ + zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + IOMAP_ZERO_PAGE_ORDER); + + if (!zero_page) + return -ENOMEM; + + set_memory_ro((unsigned long)page_address(zero_page), + 1U << IOMAP_ZERO_PAGE_ORDER); + return 0; +} +fs_initcall(iomap_dio_init);
On Fri, Oct 04, 2024 at 02:16:31PM -0400, Sasha Levin wrote:
From: Pankaj Raghav p.raghav@samsung.com
[ Upstream commit 10553a91652d995274da63fc317470f703765081 ]
iomap_dio_zero() will pad a fs block with zeroes if the direct IO size < fs block size. iomap_dio_zero() has an implicit assumption that fs block size < page_size. This is true for most filesystems at the moment.
If the block size > page size, this will send the contents of the page next to zero page(as len > PAGE_SIZE) to the underlying block device, causing FS corruption.
iomap is a generic infrastructure and it should not make any assumptions about the fs block size and the page size of the system.
Signed-off-by: Pankaj Raghav p.raghav@samsung.com Link: https://lore.kernel.org/r/20240822135018.1931258-7-kernel@pankajraghav.com Reviewed-by: Hannes Reinecke hare@suse.de Reviewed-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Dave Chinner dchinner@redhat.com Reviewed-by: Daniel Gomez da.gomez@samsung.com Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org
fs/iomap/buffered-io.c | 4 ++-- fs/iomap/direct-io.c | 45 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 8 deletions(-)
For the second time: NACK to this patch for -all- LTS kernels.
It is a support patch for a new feature introduced in 6.12-rc1 - it is *not* a bug fix, it is not in any way relevant to LTS kernels, and it will *break some architectures* as it stands.
-Dave.
From: Jan Kara jack@suse.cz
[ Upstream commit d3476f3dad4ad68ae5f6b008ea6591d1520da5d8 ]
When the filesystem is mounted with errors=remount-ro, we were setting SB_RDONLY flag to stop all filesystem modifications. We knew this misses proper locking (sb->s_umount) and does not go through proper filesystem remount procedure but it has been the way this worked since early ext2 days and it was good enough for catastrophic situation damage mitigation. Recently, syzbot has found a way (see link) to trigger warnings in filesystem freezing because the code got confused by SB_RDONLY changing under its hands. Since these days we set EXT4_FLAGS_SHUTDOWN on the superblock which is enough to stop all filesystem modifications, modifying SB_RDONLY shouldn't be needed. So stop doing that.
Link: https://lore.kernel.org/all/000000000000b90a8e061e21d12f@google.com Reported-by: Christian Brauner brauner@kernel.org Signed-off-by: Jan Kara jack@suse.cz Reviewed-by: Christian Brauner brauner@kernel.org Link: https://patch.msgid.link/20240805201241.27286-1-jack@suse.cz Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e72145c4ae5a0..93c016b186c07 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -735,11 +735,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem + * modifications. We don't set SB_RDONLY because that requires + * sb->s_umount semaphore and setting it without proper remount + * procedure is confusing code such as freeze_super() leading to + * deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; }
static void update_super_work(struct work_struct *work)
From: Wojciech Gładysz wojciech.gladysz@infogain.com
[ Upstream commit d1bc560e9a9c78d0b2314692847fc8661e0aeb99 ]
Add nested locking with I_MUTEX_XATTR subclass to avoid lockdep warning while handling xattr inode on file open syscall at ext4_xattr_inode_iget.
Backtrace EXT4-fs (loop0): Ignoring removed oldalloc option ====================================================== WARNING: possible circular locking dependency detected 5.10.0-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor543/2794 is trying to acquire lock: ffff8880215e1a48 (&ea_inode->i_rwsem#7/1){+.+.}-{3:3}, at: inode_lock include/linux/fs.h:782 [inline] ffff8880215e1a48 (&ea_inode->i_rwsem#7/1){+.+.}-{3:3}, at: ext4_xattr_inode_iget+0x42a/0x5c0 fs/ext4/xattr.c:425
but task is already holding lock: ffff8880215e3278 (&ei->i_data_sem/3){++++}-{3:3}, at: ext4_setattr+0x136d/0x19c0 fs/ext4/inode.c:5559
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&ei->i_data_sem/3){++++}-{3:3}: lock_acquire+0x197/0x480 kernel/locking/lockdep.c:5566 down_write+0x93/0x180 kernel/locking/rwsem.c:1564 ext4_update_i_disksize fs/ext4/ext4.h:3267 [inline] ext4_xattr_inode_write fs/ext4/xattr.c:1390 [inline] ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1538 [inline] ext4_xattr_set_entry+0x331a/0x3d80 fs/ext4/xattr.c:1662 ext4_xattr_ibody_set+0x124/0x390 fs/ext4/xattr.c:2228 ext4_xattr_set_handle+0xc27/0x14e0 fs/ext4/xattr.c:2385 ext4_xattr_set+0x219/0x390 fs/ext4/xattr.c:2498 ext4_xattr_user_set+0xc9/0xf0 fs/ext4/xattr_user.c:40 __vfs_setxattr+0x404/0x450 fs/xattr.c:177 __vfs_setxattr_noperm+0x11d/0x4f0 fs/xattr.c:208 __vfs_setxattr_locked+0x1f9/0x210 fs/xattr.c:266 vfs_setxattr+0x112/0x2c0 fs/xattr.c:283 setxattr+0x1db/0x3e0 fs/xattr.c:548 path_setxattr+0x15a/0x240 fs/xattr.c:567 __do_sys_setxattr fs/xattr.c:582 [inline] __se_sys_setxattr fs/xattr.c:578 [inline] __x64_sys_setxattr+0xc5/0xe0 fs/xattr.c:578 do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62 entry_SYSCALL_64_after_hwframe+0x61/0xcb
-> #0 (&ea_inode->i_rwsem#7/1){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:2988 [inline] check_prevs_add kernel/locking/lockdep.c:3113 [inline] validate_chain+0x1695/0x58f0 kernel/locking/lockdep.c:3729 __lock_acquire+0x12fd/0x20d0 kernel/locking/lockdep.c:4955 lock_acquire+0x197/0x480 kernel/locking/lockdep.c:5566 down_write+0x93/0x180 kernel/locking/rwsem.c:1564 inode_lock include/linux/fs.h:782 [inline] ext4_xattr_inode_iget+0x42a/0x5c0 fs/ext4/xattr.c:425 ext4_xattr_inode_get+0x138/0x410 fs/ext4/xattr.c:485 ext4_xattr_move_to_block fs/ext4/xattr.c:2580 [inline] ext4_xattr_make_inode_space fs/ext4/xattr.c:2682 [inline] ext4_expand_extra_isize_ea+0xe70/0x1bb0 fs/ext4/xattr.c:2774 __ext4_expand_extra_isize+0x304/0x3f0 fs/ext4/inode.c:5898 ext4_try_to_expand_extra_isize fs/ext4/inode.c:5941 [inline] __ext4_mark_inode_dirty+0x591/0x810 fs/ext4/inode.c:6018 ext4_setattr+0x1400/0x19c0 fs/ext4/inode.c:5562 notify_change+0xbb6/0xe60 fs/attr.c:435 do_truncate+0x1de/0x2c0 fs/open.c:64 handle_truncate fs/namei.c:2970 [inline] do_open fs/namei.c:3311 [inline] path_openat+0x29f3/0x3290 fs/namei.c:3425 do_filp_open+0x20b/0x450 fs/namei.c:3452 do_sys_openat2+0x124/0x460 fs/open.c:1207 do_sys_open fs/open.c:1223 [inline] __do_sys_open fs/open.c:1231 [inline] __se_sys_open fs/open.c:1227 [inline] __x64_sys_open+0x221/0x270 fs/open.c:1227 do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62 entry_SYSCALL_64_after_hwframe+0x61/0xcb
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&ei->i_data_sem/3); lock(&ea_inode->i_rwsem#7/1); lock(&ei->i_data_sem/3); lock(&ea_inode->i_rwsem#7/1);
*** DEADLOCK ***
5 locks held by syz-executor543/2794: #0: ffff888026fbc448 (sb_writers#4){.+.+}-{0:0}, at: mnt_want_write+0x4a/0x2a0 fs/namespace.c:365 #1: ffff8880215e3488 (&sb->s_type->i_mutex_key#7){++++}-{3:3}, at: inode_lock include/linux/fs.h:782 [inline] #1: ffff8880215e3488 (&sb->s_type->i_mutex_key#7){++++}-{3:3}, at: do_truncate+0x1cf/0x2c0 fs/open.c:62 #2: ffff8880215e3310 (&ei->i_mmap_sem){++++}-{3:3}, at: ext4_setattr+0xec4/0x19c0 fs/ext4/inode.c:5519 #3: ffff8880215e3278 (&ei->i_data_sem/3){++++}-{3:3}, at: ext4_setattr+0x136d/0x19c0 fs/ext4/inode.c:5559 #4: ffff8880215e30c8 (&ei->xattr_sem){++++}-{3:3}, at: ext4_write_trylock_xattr fs/ext4/xattr.h:162 [inline] #4: ffff8880215e30c8 (&ei->xattr_sem){++++}-{3:3}, at: ext4_try_to_expand_extra_isize fs/ext4/inode.c:5938 [inline] #4: ffff8880215e30c8 (&ei->xattr_sem){++++}-{3:3}, at: __ext4_mark_inode_dirty+0x4fb/0x810 fs/ext4/inode.c:6018
stack backtrace: CPU: 1 PID: 2794 Comm: syz-executor543 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x177/0x211 lib/dump_stack.c:118 print_circular_bug+0x146/0x1b0 kernel/locking/lockdep.c:2002 check_noncircular+0x2cc/0x390 kernel/locking/lockdep.c:2123 check_prev_add kernel/locking/lockdep.c:2988 [inline] check_prevs_add kernel/locking/lockdep.c:3113 [inline] validate_chain+0x1695/0x58f0 kernel/locking/lockdep.c:3729 __lock_acquire+0x12fd/0x20d0 kernel/locking/lockdep.c:4955 lock_acquire+0x197/0x480 kernel/locking/lockdep.c:5566 down_write+0x93/0x180 kernel/locking/rwsem.c:1564 inode_lock include/linux/fs.h:782 [inline] ext4_xattr_inode_iget+0x42a/0x5c0 fs/ext4/xattr.c:425 ext4_xattr_inode_get+0x138/0x410 fs/ext4/xattr.c:485 ext4_xattr_move_to_block fs/ext4/xattr.c:2580 [inline] ext4_xattr_make_inode_space fs/ext4/xattr.c:2682 [inline] ext4_expand_extra_isize_ea+0xe70/0x1bb0 fs/ext4/xattr.c:2774 __ext4_expand_extra_isize+0x304/0x3f0 fs/ext4/inode.c:5898 ext4_try_to_expand_extra_isize fs/ext4/inode.c:5941 [inline] __ext4_mark_inode_dirty+0x591/0x810 fs/ext4/inode.c:6018 ext4_setattr+0x1400/0x19c0 fs/ext4/inode.c:5562 notify_change+0xbb6/0xe60 fs/attr.c:435 do_truncate+0x1de/0x2c0 fs/open.c:64 handle_truncate fs/namei.c:2970 [inline] do_open fs/namei.c:3311 [inline] path_openat+0x29f3/0x3290 fs/namei.c:3425 do_filp_open+0x20b/0x450 fs/namei.c:3452 do_sys_openat2+0x124/0x460 fs/open.c:1207 do_sys_open fs/open.c:1223 [inline] __do_sys_open fs/open.c:1231 [inline] __se_sys_open fs/open.c:1227 [inline] __x64_sys_open+0x221/0x270 fs/open.c:1227 do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62 entry_SYSCALL_64_after_hwframe+0x61/0xcb RIP: 0033:0x7f0cde4ea229 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 21 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd81d1c978 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 RAX: ffffffffffffffda RBX: 0030656c69662f30 RCX: 00007f0cde4ea229 RDX: 0000000000000089 RSI: 00000000000a0a00 RDI: 00000000200001c0 RBP: 2f30656c69662f2e R08: 0000000000208000 R09: 0000000000208000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd81d1c9c0 R13: 00007ffd81d1ca00 R14: 0000000000080000 R15: 0000000000000003 EXT4-fs error (device loop0): ext4_expand_extra_isize_ea:2730: inode #13: comm syz-executor543: corrupted in-inode xattr
Signed-off-by: Wojciech Gładysz wojciech.gladysz@infogain.com Link: https://patch.msgid.link/20240801143827.19135-1-wojciech.gladysz@infogain.co... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 46ce2f21fef9d..a6bc9a22a8afc 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -458,7 +458,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE); ext4_xattr_inode_set_ref(inode, 1); } else { - inode_lock(inode); + inode_lock_nested(inode, I_MUTEX_XATTR); inode->i_flags |= S_NOQUOTA; inode_unlock(inode); } @@ -1039,7 +1039,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, s64 ref_count; int ret;
- inode_lock(ea_inode); + inode_lock_nested(ea_inode, I_MUTEX_XATTR);
ret = ext4_reserve_inode_write(handle, ea_inode, &iloc); if (ret)
From: Thomas Richter tmricht@linux.ibm.com
[ Upstream commit b495e710157606889f2d8bdc62aebf2aa02f67a7 ]
Remove WARN_ON_ONCE statements. These have not triggered in the past.
Signed-off-by: Thomas Richter tmricht@linux.ibm.com Acked-by: Sumanth Korikkar sumanthk@linux.ibm.com Cc: Heiko Carstens hca@linux.ibm.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/perf_cpum_sf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 736c1d9632dd5..48fa9471660a8 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1463,7 +1463,7 @@ static int aux_output_begin(struct perf_output_handle *handle, unsigned long range, i, range_scan, idx, head, base, offset; struct hws_trailer_entry *te;
- if (WARN_ON_ONCE(handle->head & ~PAGE_MASK)) + if (handle->head & ~PAGE_MASK) return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT; @@ -1642,7 +1642,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) unsigned long num_sdb;
aux = perf_get_aux(handle); - if (WARN_ON_ONCE(!aux)) + if (!aux) return;
/* Inform user space new data arrived */ @@ -1661,7 +1661,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) num_sdb); break; } - if (WARN_ON_ONCE(!aux)) + if (!aux) return;
/* Update head and alert_mark to new position */ @@ -1896,12 +1896,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + if (!(event->hw.state & PERF_HES_STOPPED)) return; - - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - perf_pmu_disable(event->pmu); event->hw.state = 0; cpuhw->lsctl.cs = 1;
From: Lizhi Xu lizhi.xu@windriver.com
[ Upstream commit 985b67cd86392310d9e9326de941c22fc9340eec ]
When mounting the ext4 filesystem, if the default hash version is set to DX_HASH_SIPHASH but the casefold feature is not set, exit the mounting.
Reported-by: syzbot+340581ba9dceb7e06fb3@syzkaller.appspotmail.com Signed-off-by: Lizhi Xu lizhi.xu@windriver.com Link: https://patch.msgid.link/20240605012335.44086-1-lizhi.xu@windriver.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 93c016b186c07..e8e32cf3e2228 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3583,6 +3583,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) "mounted without CONFIG_UNICODE"); return 0; } + if (EXT4_SB(sb)->s_es->s_def_hash_version == DX_HASH_SIPHASH && + !ext4_has_feature_casefold(sb)) { + ext4_msg(sb, KERN_ERR, + "Filesystem without casefold feature cannot be " + "mounted with siphash"); + return 0; + }
if (readonly) return 1;
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit 3c4d0ae0671827f4b536cc2d26f8b9c54584ccc5 ]
Add missing warning handling to the early program check handler. This way a warning is printed to the console as soon as the early console is setup, and the kernel continues to boot.
Before this change a disabled wait psw was loaded instead and the machine was silently stopped without giving an idea about what happened.
Reviewed-by: Sven Schnelle svens@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/early.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 14d324865e33f..ecae246e8d5d4 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -177,8 +177,21 @@ static __init void setup_topology(void)
void __do_early_pgm_check(struct pt_regs *regs) { - if (!fixup_exception(regs)) - disabled_wait(); + struct lowcore *lc = get_lowcore(); + unsigned long ip; + + regs->int_code = lc->pgm_int_code; + regs->int_parm_long = lc->trans_exc_code; + ip = __rewind_psw(regs->psw, regs->int_code >> 16); + + /* Monitor Event? Might be a warning */ + if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) { + if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN) + return; + } + if (fixup_exception(regs)) + return; + disabled_wait(); }
static noinline __init void setup_lowcore_early(void)
From: Xu Kuohai xukuohai@huawei.com
[ Upstream commit 28ead3eaabc16ecc907cfb71876da028080f6356 ]
bpf progs can be attached to kernel functions, and the attached functions can take different parameters or return different return values. If prog attached to one kernel function tail calls prog attached to another kernel function, the ctx access or return value verification could be bypassed.
For example, if prog1 is attached to func1 which takes only 1 parameter and prog2 is attached to func2 which takes two parameters. Since verifier assumes the bpf ctx passed to prog2 is constructed based on func2's prototype, verifier allows prog2 to access the second parameter from the bpf ctx passed to it. The problem is that verifier does not prevent prog1 from passing its bpf ctx to prog2 via tail call. In this case, the bpf ctx passed to prog2 is constructed from func1 instead of func2, that is, the assumption for ctx access verification is bypassed.
Another example, if BPF LSM prog1 is attached to hook file_alloc_security, and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier knows the return value rules for these two hooks, e.g. it is legal for bpf_lsm_audit_rule_known to return positive number 1, and it is illegal for file_alloc_security to return positive number. So verifier allows prog2 to return positive number 1, but does not allow prog1 to return positive number. The problem is that verifier does not prevent prog1 from calling prog2 via tail call. In this case, prog2's return value 1 will be used as the return value for prog1's hook file_alloc_security. That is, the return value rule is bypassed.
This patch adds restriction for tail call to prevent such bypasses.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Andrii Nakryiko andrii@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3b94ec161e8cc..46873d9d86494 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -294,6 +294,7 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7ee62e38faf0e..4e07cc057d6f2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2302,6 +2302,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map, { enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; + struct bpf_prog_aux *aux = fp->aux;
if (fp->kprobe_override) return false; @@ -2311,7 +2312,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map, * in the case of devmap and cpumap). Until device checks * are implemented, prohibit adding dev-bound programs to program maps. */ - if (bpf_prog_is_dev_bound(fp->aux)) + if (bpf_prog_is_dev_bound(aux)) return false;
spin_lock(&map->owner.lock); @@ -2321,12 +2322,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map, */ map->owner.type = prog_type; map->owner.jited = fp->jited; - map->owner.xdp_has_frags = fp->aux->xdp_has_frags; + map->owner.xdp_has_frags = aux->xdp_has_frags; + map->owner.attach_func_proto = aux->attach_func_proto; ret = true; } else { ret = map->owner.type == prog_type && map->owner.jited == fp->jited && - map->owner.xdp_has_frags == fp->aux->xdp_has_frags; + map->owner.xdp_has_frags == aux->xdp_has_frags; + if (ret && + map->owner.attach_func_proto != aux->attach_func_proto) { + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_STRUCT_OPS: + ret = false; + break; + default: + break; + } + } } spin_unlock(&map->owner.lock);
From: Daniel Jordan daniel.m.jordan@oracle.com
[ Upstream commit 2351e8c65404aabc433300b6bf90c7a37e8bbc4d ]
Some distros have grub2 config files with the lines
if [ x"${feature_menuentry_id}" = xy ]; then menuentry_id_option="--id" else menuentry_id_option="" fi
which match the skip regex defined for grub2 in get_grub_index():
$skip = '^\s*menuentry';
These false positives cause the grub number to be higher than it should be, and the wrong kernel can end up booting.
Grub documents the menuentry command with whitespace between it and the title, so make the skip regex reflect this.
Link: https://lore.kernel.org/20240904175530.84175-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan daniel.m.jordan@oracle.com Acked-by: John 'Warthog9' Hawley (Tenstorrent) warthog9@eaglescrag.net Signed-off-by: Steven Rostedt rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index eb31cd9c977bf..e24cd825e70a6 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2047,7 +2047,7 @@ sub get_grub_index { } elsif ($reboot_type eq "grub2") { $command = "cat $grub_file"; $target = '^\s*menuentry.*' . $grub_menu_qt; - $skip = '^\s*menuentry'; + $skip = '^\s*menuentry\s'; $submenu = '^\s*submenu\s'; } elsif ($reboot_type eq "grub2bls") { $command = $grub_bls_get;
From: Saravanan Vajravel saravanan.vajravel@broadcom.com
[ Upstream commit 2a777679b8ccd09a9a65ea0716ef10365179caac ]
Current timeout handler of mad agent acquires/releases mad_agent_priv lock for every timed out WRs. This causes heavy locking contention when higher no. of WRs are to be handled inside timeout handler.
This leads to softlockup with below trace in some use cases where rdma-cm path is used to establish connection between peer nodes
Trace: ----- BUG: soft lockup - CPU#4 stuck for 26s! [kworker/u128:3:19767] CPU: 4 PID: 19767 Comm: kworker/u128:3 Kdump: loaded Tainted: G OE ------- --- 5.14.0-427.13.1.el9_4.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/01YM03, BIOS 2.4.8 11/26/2019 Workqueue: ib_mad1 timeout_sends [ib_core] RIP: 0010:__do_softirq+0x78/0x2ac RSP: 0018:ffffb253449e4f98 EFLAGS: 00000246 RAX: 00000000ffffffff RBX: 0000000000000000 RCX: 000000000000001f RDX: 000000000000001d RSI: 000000003d1879ab RDI: fff363b66fd3a86b RBP: ffffb253604cbcd8 R08: 0000009065635f3b R09: 0000000000000000 R10: 0000000000000040 R11: ffffb253449e4ff8 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000040 FS: 0000000000000000(0000) GS:ffff8caa1fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd9ec9db900 CR3: 0000000891934006 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <IRQ> ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? __irq_exit_rcu+0xa1/0xc0 ? watchdog_timer_fn+0x1b2/0x210 ? __pfx_watchdog_timer_fn+0x10/0x10 ? __hrtimer_run_queues+0x127/0x2c0 ? hrtimer_interrupt+0xfc/0x210 ? __sysvec_apic_timer_interrupt+0x5c/0x110 ? sysvec_apic_timer_interrupt+0x37/0x90 ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? __do_softirq+0x78/0x2ac ? __do_softirq+0x60/0x2ac __irq_exit_rcu+0xa1/0xc0 sysvec_call_function_single+0x72/0x90 </IRQ> <TASK> asm_sysvec_call_function_single+0x16/0x20 RIP: 0010:_raw_spin_unlock_irq+0x14/0x30 RSP: 0018:ffffb253604cbd88 EFLAGS: 00000247 RAX: 000000000001960d RBX: 0000000000000002 RCX: ffff8cad2a064800 RDX: 000000008020001b RSI: 0000000000000001 RDI: ffff8cad5d39f66c RBP: ffff8cad5d39f600 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8caa443e0c00 R11: ffffb253604cbcd8 R12: ffff8cacb8682538 R13: 0000000000000005 R14: ffffb253604cbd90 R15: ffff8cad5d39f66c cm_process_send_error+0x122/0x1d0 [ib_cm] timeout_sends+0x1dd/0x270 [ib_core] process_one_work+0x1e2/0x3b0 ? __pfx_worker_thread+0x10/0x10 worker_thread+0x50/0x3a0 ? __pfx_worker_thread+0x10/0x10 kthread+0xdd/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x50 </TASK>
Simplified timeout handler by creating local list of timed out WRs and invoke send handler post creating the list. The new method acquires/ releases lock once to fetch the list and hence helps to reduce locking contetiong when processing higher no. of WRs
Signed-off-by: Saravanan Vajravel saravanan.vajravel@broadcom.com Link: https://lore.kernel.org/r/20240722110325.195085-1-saravanan.vajravel@broadco... Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/core/mad.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7439e47ff951d..70708fea12962 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2616,14 +2616,16 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
static void timeout_sends(struct work_struct *work) { + struct ib_mad_send_wr_private *mad_send_wr, *n; struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; + struct list_head local_list; unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); mad_send_wc.vendor_err = 0; + INIT_LIST_HEAD(&local_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { @@ -2641,13 +2643,16 @@ static void timeout_sends(struct work_struct *work) break; }
- list_del(&mad_send_wr->agent_list); + list_del_init(&mad_send_wr->agent_list); if (mad_send_wr->status == IB_WC_SUCCESS && !retry_send(mad_send_wr)) continue;
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, &local_list); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ list_for_each_entry_safe(mad_send_wr, n, &local_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else @@ -2655,11 +2660,8 @@ static void timeout_sends(struct work_struct *work) mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - deref_mad_agent(mad_agent_priv); - spin_lock_irqsave(&mad_agent_priv->lock, flags); } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); }
/*
From: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com
[ Upstream commit 5aedb8d8336b0a0421b58ca27d1b572aa6695b5b ]
The existing code enables the Cadence IP interrupts after the bus reset sequence. The problem with this sequence is that it might be pre-empted, giving SoundWire devices time to sync and report as ATTACHED before the interrupts are enabled. In that case, the Cadence IP will not detect a state change and will not throw an interrupt to proceed with the enumeration of a Device0.
In our overnight stress tests, we observed that a slight sub-millisecond delay in enabling interrupts after the reset was correlated with detection failures. This problem is more prevalent on the LunarLake silicon, likely due to SOC integration changes, but it was observed on earlier generations as well.
This patch reverts the sequence, with the interrupts enabled before performing the bus reset. This removes the race condition and makes sure the Cadence IP is able to detect the presence of a Device0 in all cases.
Signed-off-by: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com Signed-off-by: Bard Liao yung-chuan.liao@linux.intel.com Link: https://lore.kernel.org/r/20240805115003.88035-1-yung-chuan.liao@linux.intel... Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/soundwire/intel_bus_common.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/soundwire/intel_bus_common.c b/drivers/soundwire/intel_bus_common.c index df944e11b9caa..a75e5d7d87154 100644 --- a/drivers/soundwire/intel_bus_common.c +++ b/drivers/soundwire/intel_bus_common.c @@ -45,15 +45,15 @@ int intel_start_bus(struct sdw_intel *sdw) return ret; }
- ret = sdw_cdns_exit_reset(cdns); + ret = sdw_cdns_enable_interrupt(cdns, true); if (ret < 0) { - dev_err(dev, "%s: unable to exit bus reset sequence: %d\n", __func__, ret); + dev_err(dev, "%s: cannot enable interrupts: %d\n", __func__, ret); return ret; }
- ret = sdw_cdns_enable_interrupt(cdns, true); + ret = sdw_cdns_exit_reset(cdns); if (ret < 0) { - dev_err(dev, "%s: cannot enable interrupts: %d\n", __func__, ret); + dev_err(dev, "%s: unable to exit bus reset sequence: %d\n", __func__, ret); return ret; }
@@ -136,15 +136,15 @@ int intel_start_bus_after_reset(struct sdw_intel *sdw) return ret; }
- ret = sdw_cdns_exit_reset(cdns); + ret = sdw_cdns_enable_interrupt(cdns, true); if (ret < 0) { - dev_err(dev, "unable to exit bus reset sequence during resume\n"); + dev_err(dev, "cannot enable interrupts during resume\n"); return ret; }
- ret = sdw_cdns_enable_interrupt(cdns, true); + ret = sdw_cdns_exit_reset(cdns); if (ret < 0) { - dev_err(dev, "cannot enable interrupts during resume\n"); + dev_err(dev, "unable to exit bus reset sequence during resume\n"); return ret; }
From: WangYuli wangyuli@uniontech.com
[ Upstream commit 9246b487ab3c3b5993aae7552b7a4c541cc14a49 ]
Add DMA support for audio function of Glenfly Arise chip, which uses Requester ID of function 0.
Link: https://lore.kernel.org/r/CA2BBD087345B6D1+20240823095708.3237375-1-wangyuli... Signed-off-by: SiyuLi siyuli@glenfly.com Signed-off-by: WangYuli wangyuli@uniontech.com [bhelgaas: lower-case hex to match local code, drop unused Device IDs] Signed-off-by: Bjorn Helgaas bhelgaas@google.com Reviewed-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/quirks.c | 4 ++++ include/linux/pci_ids.h | 2 ++ sound/pci/hda/hda_intel.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a2ce4e08edf5a..cc6c82c3bd3d0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4246,6 +4246,10 @@ static void quirk_dma_func0_alias(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, quirk_dma_func0_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias);
+/* Some Glenfly chips use function 0 as the PCIe Requester ID for DMA */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_GLENFLY, 0x3d40, quirk_dma_func0_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_GLENFLY, 0x3d41, quirk_dma_func0_alias); + static void quirk_dma_func1_alias(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 1) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e388c8b1cbc27..2c94d4004dd50 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2661,6 +2661,8 @@ #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 #define PCI_DEVICE_ID_DCI_PCCOM2 0x0004
+#define PCI_VENDOR_ID_GLENFLY 0x6766 + #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 #define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8 diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 97d33a48ff17c..c98b1821b0620 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2679,7 +2679,7 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, /* GLENFLY */ - { PCI_DEVICE(0x6766, PCI_ANY_ID), + { PCI_DEVICE(PCI_VENDOR_ID_GLENFLY, PCI_ANY_ID), .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8, .class_mask = 0xffffff, .driver_data = AZX_DRIVER_GFHDMI | AZX_DCAPS_POSFIX_LPIB |
From: Md Haris Iqbal haris.iqbal@ionos.com
[ Upstream commit d0e62bf7b575fbfe591f6f570e7595dd60a2f5eb ]
For RTRS path establishment, RTRS client initiates and completes con_num of connections. After establishing all its connections, the information is exchanged between the client and server through the info_req message. During this exchange, it is essential that all connections have been established, and the state of the RTRS srv path is CONNECTED.
So add these sanity checks, to make sure we detect and abort process in error scenarios to avoid null pointer deref.
Signed-off-by: Md Haris Iqbal haris.iqbal@ionos.com Signed-off-by: Jack Wang jinpu.wang@ionos.com Signed-off-by: Grzegorz Prajsner grzegorz.prajsner@ionos.com Link: https://patch.msgid.link/20240821112217.41827-9-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 1d33efb8fb03b..e9835a1666d3a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -931,12 +931,11 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) if (err) goto close;
-out: rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); return; close: + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); close_path(srv_path); - goto out; }
static int post_recv_info_req(struct rtrs_srv_con *con) @@ -987,6 +986,16 @@ static int post_recv_path(struct rtrs_srv_path *srv_path) q_size = SERVICE_CON_QUEUE_DEPTH; else q_size = srv->queue_depth; + if (srv_path->state != RTRS_SRV_CONNECTING) { + rtrs_err(s, "Path state invalid. state %s\n", + rtrs_srv_state_str(srv_path->state)); + return -EIO; + } + + if (!srv_path->s.con[cid]) { + rtrs_err(s, "Conn not set for %d\n", cid); + return -EIO; + }
err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size); if (err) {
From: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org
[ Upstream commit f92d67e23b8caa81f6322a2bad1d633b00ca000e ]
Driver code is leaking OF node reference from of_get_parent() in bcm53573_ilp_init(). Usage of of_get_parent() is not needed in the first place, because the parent node will not be freed while we are processing given node (triggered by CLK_OF_DECLARE()). Thus fix the leak by accessing parent directly, instead of of_get_parent().
Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org Link: https://lore.kernel.org/r/20240826065801.17081-1-krzysztof.kozlowski@linaro.... Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/bcm/clk-bcm53573-ilp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clk/bcm/clk-bcm53573-ilp.c b/drivers/clk/bcm/clk-bcm53573-ilp.c index 84f2af736ee8a..83ef41d618be3 100644 --- a/drivers/clk/bcm/clk-bcm53573-ilp.c +++ b/drivers/clk/bcm/clk-bcm53573-ilp.c @@ -112,7 +112,7 @@ static void bcm53573_ilp_init(struct device_node *np) goto err_free_ilp; }
- ilp->regmap = syscon_node_to_regmap(of_get_parent(np)); + ilp->regmap = syscon_node_to_regmap(np->parent); if (IS_ERR(ilp->regmap)) { err = PTR_ERR(ilp->regmap); goto err_free_ilp;
From: Subramanian Ananthanarayanan quic_skananth@quicinc.com
[ Upstream commit 026f84d3fa62d215b11cbeb5a5d97df941e93b5c ]
The Qualcomm SA8775P root ports don't advertise an ACS capability, but they do provide ACS-like features to disable peer transactions and validate bus numbers in requests.
Thus, add an ACS quirk for the SA8775P.
Link: https://lore.kernel.org/linux-pci/20240906052228.1829485-1-quic_skananth@qui... Signed-off-by: Subramanian Ananthanarayanan quic_skananth@quicinc.com Signed-off-by: Krzysztof Wilczyński kwilczynski@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index cc6c82c3bd3d0..8502f8a27f239 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5074,6 +5074,8 @@ static const struct pci_dev_acs_enabled { /* QCOM QDF2xxx root ports */ { PCI_VENDOR_ID_QCOM, 0x0400, pci_quirk_qcom_rp_acs }, { PCI_VENDOR_ID_QCOM, 0x0401, pci_quirk_qcom_rp_acs }, + /* QCOM SA8775P root port */ + { PCI_VENDOR_ID_QCOM, 0x0115, pci_quirk_qcom_rp_acs }, /* HXT SD4800 root ports. The ACS design is same as QCOM QDF2xxx */ { PCI_VENDOR_ID_HXT, 0x0401, pci_quirk_qcom_rp_acs }, /* Intel PCH root ports */
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit 43457ada98c824f310adb7bd96bd5f2fcd9a3279 ]
On chipsets with a second 'Integrated Device Function' SMBus controller use a different adapter-name for the second IDF adapter.
This allows platform glue code which is looking for the primary i801 adapter to manually instantiate i2c_clients on to differentiate between the 2.
This allows such code to find the primary i801 adapter by name, without needing to duplicate the PCI-ids to feature-flags mapping from i2c-i801.c.
Reviewed-by: Pali Rohár pali@kernel.org Signed-off-by: Hans de Goede hdegoede@redhat.com Acked-by: Wolfram Sang wsa+renesas@sang-engineering.com Signed-off-by: Andi Shyti andi.shyti@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-i801.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 328c0dab6b147..299fe9d3afab0 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1763,8 +1763,15 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
i801_add_tco(priv);
+ /* + * adapter.name is used by platform code to find the main I801 adapter + * to instantiante i2c_clients, do not change. + */ snprintf(priv->adapter.name, sizeof(priv->adapter.name), - "SMBus I801 adapter at %04lx", priv->smba); + "SMBus %s adapter at %04lx", + (priv->features & FEATURE_IDF) ? "I801 IDF" : "I801", + priv->smba); + err = i2c_add_adapter(&priv->adapter); if (err) { platform_device_unregister(priv->tco_pdev);
From: Alex Williamson alex.williamson@redhat.com
[ Upstream commit 2910306655a7072640021563ec9501bfa67f0cb1 ]
Per user reports, the Creative Labs EMU20k2 (Sound Blaster X-Fi Titanium Series) generates spurious interrupts when used with vfio-pci unless DisINTx masking support is disabled.
Thus, quirk the device to mark INTx masking as broken.
Closes: https://lore.kernel.org/all/VI1PR10MB8207C507DB5420AB4C7281E0DB9A2@VI1PR10MB... Link: https://lore.kernel.org/linux-pci/20240912215331.839220-1-alex.williamson@re... Reported-by: zdravko delineshev delineshev@outlook.com Signed-off-by: Alex Williamson alex.williamson@redhat.com [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński kwilczynski@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8502f8a27f239..9e4f8d06e2778 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3608,6 +3608,8 @@ DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ quirk_broken_intx_masking); DECLARE_PCI_FIXUP_FINAL(0x1b7c, 0x0004, /* Ceton InfiniTV4 */ quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2, + quirk_broken_intx_masking);
/* * Realtek RTL8169 PCI Gigabit Ethernet Controller (rev 10)
From: Kaixin Wang kxwang23@m.fudan.edu.cn
[ Upstream commit 609366e7a06d035990df78f1562291c3bf0d4a12 ]
In the cdns_i3c_master_probe function, &master->hj_work is bound with cdns_i3c_master_hj. And cdns_i3c_master_interrupt can call cnds_i3c_master_demux_ibis function to start the work.
If we remove the module which will call cdns_i3c_master_remove to make cleanup, it will free master->base through i3c_master_unregister while the work mentioned above will be used. The sequence of operations that may lead to a UAF bug is as follows:
CPU0 CPU1
| cdns_i3c_master_hj cdns_i3c_master_remove | i3c_master_unregister(&master->base) | device_unregister(&master->dev) | device_release | //free master->base | | i3c_master_do_daa(&master->base) | //use master->base
Fix it by ensuring that the work is canceled before proceeding with the cleanup in cdns_i3c_master_remove.
Signed-off-by: Kaixin Wang kxwang23@m.fudan.edu.cn Link: https://lore.kernel.org/r/20240911153544.848398-1-kxwang23@m.fudan.edu.cn Signed-off-by: Alexandre Belloni alexandre.belloni@bootlin.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i3c/master/i3c-master-cdns.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index c1627f3552ce3..c2d26beb3da2b 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -1666,6 +1666,7 @@ static void cdns_i3c_master_remove(struct platform_device *pdev) { struct cdns_i3c_master *master = platform_get_drvdata(pdev);
+ cancel_work_sync(&master->hj_work); i3c_master_unregister(&master->base);
clk_disable_unprepare(master->sysclk);
From: Palmer Dabbelt palmer@rivosinc.com
[ Upstream commit ad380f6a0a5e82e794b45bb2eaec24ed51a56846 ]
I recently ended up with a warning on some compilers along the lines of
CC kernel/resource.o In file included from include/linux/ioport.h:16, from kernel/resource.c:15: kernel/resource.c: In function 'gfr_start': include/linux/minmax.h:49:37: error: conversion from 'long long unsigned int' to 'resource_size_t' {aka 'unsigned int'} changes value from '17179869183' to '4294967295' [-Werror=overflow] 49 | ({ type ux = (x); type uy = (y); __cmp(op, ux, uy); }) | ^ include/linux/minmax.h:52:9: note: in expansion of macro '__cmp_once_unique' 52 | __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) | ^~~~~~~~~~~~~~~~~ include/linux/minmax.h:161:27: note: in expansion of macro '__cmp_once' 161 | #define min_t(type, x, y) __cmp_once(min, type, x, y) | ^~~~~~~~~~ kernel/resource.c:1829:23: note: in expansion of macro 'min_t' 1829 | end = min_t(resource_size_t, base->end, | ^~~~~ kernel/resource.c: In function 'gfr_continue': include/linux/minmax.h:49:37: error: conversion from 'long long unsigned int' to 'resource_size_t' {aka 'unsigned int'} changes value from '17179869183' to '4294967295' [-Werror=overflow] 49 | ({ type ux = (x); type uy = (y); __cmp(op, ux, uy); }) | ^ include/linux/minmax.h:52:9: note: in expansion of macro '__cmp_once_unique' 52 | __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) | ^~~~~~~~~~~~~~~~~ include/linux/minmax.h:161:27: note: in expansion of macro '__cmp_once' 161 | #define min_t(type, x, y) __cmp_once(min, type, x, y) | ^~~~~~~~~~ kernel/resource.c:1847:24: note: in expansion of macro 'min_t' 1847 | addr <= min_t(resource_size_t, base->end, | ^~~~~ cc1: all warnings being treated as errors
which looks like a real problem: our phys_addr_t is only 32 bits now, so having 34-bit masks is just going to result in overflows.
Reviewed-by: Charlie Jenkins charlie@rivosinc.com Reviewed-by: Alexandre Ghiti alexghiti@rivosinc.com Link: https://lore.kernel.org/r/20240731162159.9235-2-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/include/asm/sparsemem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h index 63acaecc33747..2f901a410586d 100644 --- a/arch/riscv/include/asm/sparsemem.h +++ b/arch/riscv/include/asm/sparsemem.h @@ -7,7 +7,7 @@ #ifdef CONFIG_64BIT #define MAX_PHYSMEM_BITS 56 #else -#define MAX_PHYSMEM_BITS 34 +#define MAX_PHYSMEM_BITS 32 #endif /* CONFIG_64BIT */ #define SECTION_SIZE_BITS 27 #endif /* CONFIG_SPARSEMEM */
From: Jens Axboe axboe@kernel.dk
[ Upstream commit eac2ca2d682f94f46b1973bdf5e77d85d77b8e53 ]
In terms of normal application usage, this list will always be empty. And if an application does overflow a bit, it'll have a few entries. However, nothing obviously prevents syzbot from running a test case that generates a ton of overflow entries, and then flushing them can take quite a while.
Check for needing to reschedule while flushing, and drop our locks and do so if necessary. There's no state to maintain here as overflows always prune from head-of-list, hence it's fine to drop and reacquire the locks at the end of the loop.
Link: https://lore.kernel.org/io-uring/66ed061d.050a0220.29194.0053.GAE@google.com... Reported-by: syzbot+5fca234bd7eb378ff78e@syzkaller.appspotmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- io_uring/io_uring.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3942db160f18e..637e59503ef10 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -624,6 +624,21 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying) } list_del(&ocqe->list); kfree(ocqe); + + /* + * For silly syzbot cases that deliberately overflow by huge + * amounts, check if we need to resched and drop and + * reacquire the locks if so. Nothing real would ever hit this. + * Ideally we'd have a non-posting unlock for this, but hard + * to care for a non-real case. + */ + if (need_resched()) { + io_cq_unlock_post(ctx); + mutex_unlock(&ctx->uring_lock); + cond_resched(); + mutex_lock(&ctx->uring_lock); + io_cq_lock(ctx); + } }
if (list_empty(&ctx->cq_overflow_list)) {
From: Kaixin Wang kxwang23@m.fudan.edu.cn
[ Upstream commit e51aded92d42784313ba16c12f4f88cc4f973bbb ]
In the switchtec_ntb_add function, it can call switchtec_ntb_init_sndev function, then &sndev->check_link_status_work is bound with check_link_status_work. switchtec_ntb_link_notification may be called to start the work.
If we remove the module which will call switchtec_ntb_remove to make cleanup, it will free sndev through kfree(sndev), while the work mentioned above will be used. The sequence of operations that may lead to a UAF bug is as follows:
CPU0 CPU1
| check_link_status_work switchtec_ntb_remove | kfree(sndev); | | if (sndev->link_force_down) | // use sndev
Fix it by ensuring that the work is canceled before proceeding with the cleanup in switchtec_ntb_remove.
Signed-off-by: Kaixin Wang kxwang23@m.fudan.edu.cn Reviewed-by: Logan Gunthorpe logang@deltatee.com Signed-off-by: Jon Mason jdmason@kudzu.us Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 31946387badf0..ad1786be2554b 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -1554,6 +1554,7 @@ static void switchtec_ntb_remove(struct device *dev) switchtec_ntb_deinit_db_msg_irq(sndev); switchtec_ntb_deinit_shared_mw(sndev); switchtec_ntb_deinit_crosslink(sndev); + cancel_work_sync(&sndev->check_link_status_work); kfree(sndev); dev_info(dev, "ntb device unregistered\n"); }
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit ae7eee56cdcfcb6a886f76232778d6517fd58690 ]
There are 2G and 4G RAM versions of the Lenovo Yoga Tab 3 X90F and it turns out that the 2G version has a DMI product name of "CHERRYVIEW D1 PLATFORM" where as the 4G version has "CHERRYVIEW C0 PLATFORM". The sys-vendor + product-version check are unique enough that the product-name check is not necessary.
Drop the product-name check so that the existing DMI match for the 4G RAM version also matches the 2G RAM version.
Signed-off-by: Hans de Goede hdegoede@redhat.com Reviewed-by: Andy Shevchenko andy@kernel.org Link: https://lore.kernel.org/r/20240825132617.8809-1-hdegoede@redhat.com Signed-off-by: Lee Jones lee@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mfd/intel_soc_pmic_chtwc.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/mfd/intel_soc_pmic_chtwc.c b/drivers/mfd/intel_soc_pmic_chtwc.c index 7fce3ef7ab453..2a83f540d4c9d 100644 --- a/drivers/mfd/intel_soc_pmic_chtwc.c +++ b/drivers/mfd/intel_soc_pmic_chtwc.c @@ -178,7 +178,6 @@ static const struct dmi_system_id cht_wc_model_dmi_ids[] = { .driver_data = (void *)(long)INTEL_CHT_WC_LENOVO_YT3_X90, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, },
From: Ilpo Järvinen ilpo.jarvinen@linux.intel.com
[ Upstream commit 6112597f5ba84b70870fade5069ccc9c8b534a33 ]
Add Intel Arrow Lake-H PCI IDs.
Signed-off-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Reviewed-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Link: https://lore.kernel.org/r/20240829095719.1557-2-ilpo.jarvinen@linux.intel.co... Signed-off-by: Lee Jones lee@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mfd/intel-lpss-pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 1362b3f64ade6..0eb6a98ed0fc0 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -424,6 +424,19 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x5ac4), (kernel_ulong_t)&bxt_spi_info }, { PCI_VDEVICE(INTEL, 0x5ac6), (kernel_ulong_t)&bxt_spi_info }, { PCI_VDEVICE(INTEL, 0x5aee), (kernel_ulong_t)&bxt_uart_info }, + /* ARL-H */ + { PCI_VDEVICE(INTEL, 0x7725), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x7726), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x7727), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0x7730), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0x7746), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0x7750), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x7751), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x7752), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x7778), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x7779), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x777a), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x777b), (kernel_ulong_t)&bxt_i2c_info }, /* RPL-S */ { PCI_VDEVICE(INTEL, 0x7a28), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x7a29), (kernel_ulong_t)&bxt_uart_info },
From: Ilpo Järvinen ilpo.jarvinen@linux.intel.com
[ Upstream commit db6a186505c8156e55681a3b93ada431863b85c1 ]
Add Intel Panther Lake-H/P PCI IDs.
Signed-off-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Reviewed-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Link: https://lore.kernel.org/r/20240829095719.1557-3-ilpo.jarvinen@linux.intel.co... Signed-off-by: Lee Jones lee@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mfd/intel-lpss-pci.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+)
diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 0eb6a98ed0fc0..1d8cdc4d5819b 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -607,6 +607,32 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa879), (kernel_ulong_t)&ehl_i2c_info }, { PCI_VDEVICE(INTEL, 0xa87a), (kernel_ulong_t)&ehl_i2c_info }, { PCI_VDEVICE(INTEL, 0xa87b), (kernel_ulong_t)&ehl_i2c_info }, + /* PTL-H */ + { PCI_VDEVICE(INTEL, 0xe325), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe326), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe327), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe330), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe346), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe350), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe351), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe352), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe378), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe379), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe37a), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe37b), (kernel_ulong_t)&ehl_i2c_info }, + /* PTL-P */ + { PCI_VDEVICE(INTEL, 0xe425), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe426), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe427), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe430), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe446), (kernel_ulong_t)&tgl_spi_info }, + { PCI_VDEVICE(INTEL, 0xe450), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe451), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe452), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xe478), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe479), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe47a), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xe47b), (kernel_ulong_t)&ehl_i2c_info }, { } }; MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids);
From: Samuel Holland samuel.holland@sifive.com
[ Upstream commit 58ff537109ac863d4ec83baf8413b17dcc10101c ]
The optimized string routines are implemented in assembly, so they are not instrumented for use with KASAN. Fall back to the C version of the routines in order to improve KASAN coverage. This fixes the kasan_strings() unit test.
Signed-off-by: Samuel Holland samuel.holland@sifive.com Reviewed-by: Alexandre Ghiti alexghiti@rivosinc.com Tested-by: Alexandre Ghiti alexghiti@rivosinc.com Link: https://lore.kernel.org/r/20240801033725.28816-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/include/asm/string.h | 2 ++ arch/riscv/kernel/riscv_ksyms.c | 3 --- arch/riscv/lib/Makefile | 2 ++ arch/riscv/lib/strcmp.S | 1 + arch/riscv/lib/strlen.S | 1 + arch/riscv/lib/strncmp.S | 1 + arch/riscv/purgatory/Makefile | 2 ++ 7 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index a96b1fea24fe4..5ba77f60bf0b5 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -19,6 +19,7 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t);
+#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) #define __HAVE_ARCH_STRCMP extern asmlinkage int strcmp(const char *cs, const char *ct);
@@ -27,6 +28,7 @@ extern asmlinkage __kernel_size_t strlen(const char *);
#define __HAVE_ARCH_STRNCMP extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); +#endif
/* For those files which don't want to check by kasan. */ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index a72879b4249a5..5ab1c7e1a6ed5 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,9 +12,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 2b369f51b0a5e..8eec6b69a875f 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,9 +3,11 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) lib-y += strcmp.o lib-y += strlen.o lib-y += strncmp.o +endif lib-y += csum.o ifeq ($(CONFIG_MMU), y) lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index 687b2bea5c438..542301a67a2ff 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -120,3 +120,4 @@ strcmp_zbb: .option pop #endif SYM_FUNC_END(strcmp) +EXPORT_SYMBOL(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 8ae3064e45ff0..962983b73251e 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -131,3 +131,4 @@ strlen_zbb: #endif SYM_FUNC_END(strlen) SYM_FUNC_ALIAS(__pi_strlen, strlen) +EXPORT_SYMBOL(strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index aba5b3148621d..0f359ea2f55b2 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -136,3 +136,4 @@ strncmp_zbb: .option pop #endif SYM_FUNC_END(strncmp) +EXPORT_SYMBOL(strncmp) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index f11945ee24903..fb9c917c9b457 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0
purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) purgatory-y += strcmp.o strlen.o strncmp.o +endif
targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
From: Jisheng Zhang jszhang@kernel.org
[ Upstream commit 8f1534e7440382d118c3d655d3a6014128b2086d ]
Inspired by[1], modify the code to remove the code of modifying ra to avoid imbalance RAS (return address stack) which may lead to incorret predictions on return.
Link: https://lore.kernel.org/linux-riscv/20240607061335.2197383-1-cyrilbur@tensto... [1] Signed-off-by: Jisheng Zhang jszhang@kernel.org Reviewed-by: Cyril Bur cyrilbur@tenstorrent.com Link: https://lore.kernel.org/r/20240720170659.1522-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ac2e908d4418d..fefb8e7d957a0 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -239,8 +239,8 @@ SYM_CODE_START(ret_from_fork) jalr s0 1: move a0, sp /* pt_regs */ - la ra, ret_from_exception - tail syscall_exit_to_user_mode + call syscall_exit_to_user_mode + j ret_from_exception SYM_CODE_END(ret_from_fork)
#ifdef CONFIG_IRQ_STACKS
From: Michael Guralnik michaelgur@nvidia.com
[ Upstream commit 8c6d097d830f779fc1725fbaa1314f20a7a07b4b ]
The new memory scheme page faults are requesting the driver to fetch additinal pages to the faulted memory access. This is done in order to prefetch pages before and after the area that got the page fault, assuming this will reduce the total amount of page faults.
The driver should ensure it handles only the pages that are within the umem range.
Signed-off-by: Michael Guralnik michaelgur@nvidia.com Link: https://patch.msgid.link/20240909100504.29797-5-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/mlx5/odp.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index a524181f34df9..3a4605fda6d57 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -733,24 +733,31 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, * >0: Number of pages mapped */ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, - u32 *bytes_mapped, u32 flags) + u32 *bytes_mapped, u32 flags, bool permissive_fault) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- if (unlikely(io_virt < mr->ibmr.iova)) + if (unlikely(io_virt < mr->ibmr.iova) && !permissive_fault) return -EFAULT;
if (mr->umem->is_dmabuf) return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
if (!odp->is_implicit_odp) { + u64 offset = io_virt < mr->ibmr.iova ? 0 : io_virt - mr->ibmr.iova; u64 user_va;
- if (check_add_overflow(io_virt - mr->ibmr.iova, - (u64)odp->umem.address, &user_va)) + if (check_add_overflow(offset, (u64)odp->umem.address, + &user_va)) return -EFAULT; - if (unlikely(user_va >= ib_umem_end(odp) || - ib_umem_end(odp) - user_va < bcnt)) + + if (permissive_fault) { + if (user_va < ib_umem_start(odp)) + user_va = ib_umem_start(odp); + if ((user_va + bcnt) > ib_umem_end(odp)) + bcnt = ib_umem_end(odp) - user_va; + } else if (unlikely(user_va >= ib_umem_end(odp) || + ib_umem_end(odp) - user_va < bcnt)) return -EFAULT; return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped, flags); @@ -857,7 +864,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); + ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false); if (ret < 0) goto end;
@@ -1710,7 +1717,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, - work->pf_flags); + work->pf_flags, false); if (ret <= 0) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); @@ -1761,7 +1768,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, if (IS_ERR(mr)) return PTR_ERR(mr); ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, - &bytes_mapped, pf_flags); + &bytes_mapped, pf_flags, false); if (ret < 0) { mlx5r_deref_odp_mkey(&mr->mmkey); return ret;
From: Prudhvi Yarlagadda quic_pyarlaga@quicinc.com
[ Upstream commit 10ba0854c5e6165b58e17bda5fb671e729fecf9e ]
PARF hardware block which is a wrapper on top of DWC PCIe controller mirrors the DBI and ATU register space. It uses PARF_SLV_ADDR_SPACE_SIZE register to get the size of the memory block to be mirrored and uses PARF_DBI_BASE_ADDR, PARF_ATU_BASE_ADDR registers to determine the base address of DBI and ATU space inside the memory block that is being mirrored.
When a memory region which is located above the SLV_ADDR_SPACE_SIZE boundary is used for BAR region then there could be an overlap of DBI and ATU address space that is getting mirrored and the BAR region. This results in DBI and ATU address space contents getting updated when a PCIe function driver tries updating the BAR/MMIO memory region. Reference memory map of the PCIe memory region with DBI and ATU address space overlapping BAR region is as below.
|---------------| | | | | ------- --------|---------------| | | |---------------| | | | DBI | | | |---------------|---->DBI_BASE_ADDR | | | | | | | | | PCIe | |---->2*SLV_ADDR_SPACE_SIZE | BAR/MMIO|---------------| | Region | ATU | | | |---------------|---->ATU_BASE_ADDR | | | | PCIe | |---------------| Memory | | DBI | Region | |---------------|---->DBI_BASE_ADDR | | | | | --------| | | | |---->SLV_ADDR_SPACE_SIZE | |---------------| | | ATU | | |---------------|---->ATU_BASE_ADDR | | | | |---------------| | | DBI | | |---------------|---->DBI_BASE_ADDR | | | | | | ----------------|---------------| | | | | | | |---------------|
Currently memory region beyond the SLV_ADDR_SPACE_SIZE boundary is not used for BAR region which is why the above mentioned issue is not encountered. This issue is discovered as part of internal testing when we tried moving the BAR region beyond the SLV_ADDR_SPACE_SIZE boundary. Hence we are trying to fix this.
As PARF hardware block mirrors DBI and ATU register space after every PARF_SLV_ADDR_SPACE_SIZE (default 0x1000000) boundary multiple, program maximum possible size to this register by writing 0x80000000 to it(it considers only powers of 2 as values) to avoid mirroring DBI and ATU to BAR/MMIO region. Write the physical base address of DBI and ATU register blocks to PARF_DBI_BASE_ADDR (default 0x0) and PARF_ATU_BASE_ADDR (default 0x1000) respectively to make sure DBI and ATU blocks are at expected memory locations.
The register offsets PARF_DBI_BASE_ADDR_V2, PARF_SLV_ADDR_SPACE_SIZE_V2 and PARF_ATU_BASE_ADDR are applicable for platforms that use Qcom IP rev 1.9.0, 2.7.0 and 2.9.0. PARF_DBI_BASE_ADDR_V2 and PARF_SLV_ADDR_SPACE_SIZE_V2 are applicable for Qcom IP rev 2.3.3. PARF_DBI_BASE_ADDR and PARF_SLV_ADDR_SPACE_SIZE are applicable for Qcom IP rev 1.0.0, 2.3.2 and 2.4.0. Update init()/post_init() functions of the respective Qcom IP versions to program applicable PARF_DBI_BASE_ADDR, PARF_SLV_ADDR_SPACE_SIZE and PARF_ATU_BASE_ADDR register offsets. Update the SLV_ADDR_SPACE_SZ macro to 0x80000000 to set highest bit in PARF_SLV_ADDR_SPACE_SIZE register.
Cache DBI and iATU physical addresses in 'struct dw_pcie' so that pcie_qcom.c driver can program these addresses in the PARF_DBI_BASE_ADDR and PARF_ATU_BASE_ADDR registers.
Suggested-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Link: https://lore.kernel.org/linux-pci/20240814220338.1969668-1-quic_pyarlaga@qui... Signed-off-by: Prudhvi Yarlagadda quic_pyarlaga@quicinc.com Signed-off-by: Krzysztof Wilczyński kwilczynski@kernel.org Reviewed-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Reviewed-by: Mayank Rana quic_mrana@quicinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/controller/dwc/pcie-designware.c | 2 + drivers/pci/controller/dwc/pcie-designware.h | 2 + drivers/pci/controller/dwc/pcie-qcom.c | 72 ++++++++++++++++---- 3 files changed, 61 insertions(+), 15 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 1b5aba1f0c92f..bc3a5d6b01779 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -112,6 +112,7 @@ int dw_pcie_get_resources(struct dw_pcie *pci) pci->dbi_base = devm_pci_remap_cfg_resource(pci->dev, res); if (IS_ERR(pci->dbi_base)) return PTR_ERR(pci->dbi_base); + pci->dbi_phys_addr = res->start; }
/* DBI2 is mainly useful for the endpoint controller */ @@ -134,6 +135,7 @@ int dw_pcie_get_resources(struct dw_pcie *pci) pci->atu_base = devm_ioremap_resource(pci->dev, res); if (IS_ERR(pci->atu_base)) return PTR_ERR(pci->atu_base); + pci->atu_phys_addr = res->start; } else { pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; } diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 53c4c8f399c88..e518f81ea80cd 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -407,8 +407,10 @@ struct dw_pcie_ops { struct dw_pcie { struct device *dev; void __iomem *dbi_base; + resource_size_t dbi_phys_addr; void __iomem *dbi_base2; void __iomem *atu_base; + resource_size_t atu_phys_addr; size_t atu_size; u32 num_ib_windows; u32 num_ob_windows; diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 6f953e32d9907..0b3020c7a50a4 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -45,6 +45,7 @@ #define PARF_PHY_REFCLK 0x4c #define PARF_CONFIG_BITS 0x50 #define PARF_DBI_BASE_ADDR 0x168 +#define PARF_SLV_ADDR_SPACE_SIZE 0x16c #define PARF_MHI_CLOCK_RESET_CTRL 0x174 #define PARF_AXI_MSTR_WR_ADDR_HALT 0x178 #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 @@ -52,8 +53,13 @@ #define PARF_LTSSM 0x1b0 #define PARF_SID_OFFSET 0x234 #define PARF_BDF_TRANSLATE_CFG 0x24c -#define PARF_SLV_ADDR_SPACE_SIZE 0x358 +#define PARF_DBI_BASE_ADDR_V2 0x350 +#define PARF_DBI_BASE_ADDR_V2_HI 0x354 +#define PARF_SLV_ADDR_SPACE_SIZE_V2 0x358 +#define PARF_SLV_ADDR_SPACE_SIZE_V2_HI 0x35c #define PARF_NO_SNOOP_OVERIDE 0x3d4 +#define PARF_ATU_BASE_ADDR 0x634 +#define PARF_ATU_BASE_ADDR_HI 0x638 #define PARF_DEVICE_TYPE 0x1000 #define PARF_BDF_TO_SID_TABLE_N 0x2000 #define PARF_BDF_TO_SID_CFG 0x2c00 @@ -108,7 +114,7 @@ #define PHY_RX0_EQ(x) FIELD_PREP(GENMASK(26, 24), x)
/* PARF_SLV_ADDR_SPACE_SIZE register value */ -#define SLV_ADDR_SPACE_SZ 0x10000000 +#define SLV_ADDR_SPACE_SZ 0x80000000
/* PARF_MHI_CLOCK_RESET_CTRL register fields */ #define AHB_CLK_EN BIT(0) @@ -325,6 +331,50 @@ static void qcom_pcie_clear_hpc(struct dw_pcie *pci) dw_pcie_dbi_ro_wr_dis(pci); }
+static void qcom_pcie_configure_dbi_base(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + + if (pci->dbi_phys_addr) { + /* + * PARF_DBI_BASE_ADDR register is in CPU domain and require to + * be programmed with CPU physical address. + */ + writel(lower_32_bits(pci->dbi_phys_addr), pcie->parf + + PARF_DBI_BASE_ADDR); + writel(SLV_ADDR_SPACE_SZ, pcie->parf + + PARF_SLV_ADDR_SPACE_SIZE); + } +} + +static void qcom_pcie_configure_dbi_atu_base(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + + if (pci->dbi_phys_addr) { + /* + * PARF_DBI_BASE_ADDR_V2 and PARF_ATU_BASE_ADDR registers are + * in CPU domain and require to be programmed with CPU + * physical addresses. + */ + writel(lower_32_bits(pci->dbi_phys_addr), pcie->parf + + PARF_DBI_BASE_ADDR_V2); + writel(upper_32_bits(pci->dbi_phys_addr), pcie->parf + + PARF_DBI_BASE_ADDR_V2_HI); + + if (pci->atu_phys_addr) { + writel(lower_32_bits(pci->atu_phys_addr), pcie->parf + + PARF_ATU_BASE_ADDR); + writel(upper_32_bits(pci->atu_phys_addr), pcie->parf + + PARF_ATU_BASE_ADDR_HI); + } + + writel(0x0, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE_V2); + writel(SLV_ADDR_SPACE_SZ, pcie->parf + + PARF_SLV_ADDR_SPACE_SIZE_V2_HI); + } +} + static void qcom_pcie_2_1_0_ltssm_enable(struct qcom_pcie *pcie) { u32 val; @@ -541,8 +591,7 @@ static int qcom_pcie_init_1_0_0(struct qcom_pcie *pcie)
static int qcom_pcie_post_init_1_0_0(struct qcom_pcie *pcie) { - /* change DBI base address */ - writel(0, pcie->parf + PARF_DBI_BASE_ADDR); + qcom_pcie_configure_dbi_base(pcie);
if (IS_ENABLED(CONFIG_PCI_MSI)) { u32 val = readl(pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT); @@ -629,8 +678,7 @@ static int qcom_pcie_post_init_2_3_2(struct qcom_pcie *pcie) val &= ~PHY_TEST_PWR_DOWN; writel(val, pcie->parf + PARF_PHY_CTRL);
- /* change DBI base address */ - writel(0, pcie->parf + PARF_DBI_BASE_ADDR); + qcom_pcie_configure_dbi_base(pcie);
/* MAC PHY_POWERDOWN MUX DISABLE */ val = readl(pcie->parf + PARF_SYS_CTRL); @@ -812,13 +860,11 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); u32 val;
- writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE); - val = readl(pcie->parf + PARF_PHY_CTRL); val &= ~PHY_TEST_PWR_DOWN; writel(val, pcie->parf + PARF_PHY_CTRL);
- writel(0, pcie->parf + PARF_DBI_BASE_ADDR); + qcom_pcie_configure_dbi_atu_base(pcie);
writel(MST_WAKEUP_EN | SLV_WAKEUP_EN | MSTR_ACLK_CGC_DIS | SLV_ACLK_CGC_DIS | CORE_CLK_CGC_DIS | @@ -914,8 +960,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) val &= ~PHY_TEST_PWR_DOWN; writel(val, pcie->parf + PARF_PHY_CTRL);
- /* change DBI base address */ - writel(0, pcie->parf + PARF_DBI_BASE_ADDR); + qcom_pcie_configure_dbi_atu_base(pcie);
/* MAC PHY_POWERDOWN MUX DISABLE */ val = readl(pcie->parf + PARF_SYS_CTRL); @@ -1124,14 +1169,11 @@ static int qcom_pcie_post_init_2_9_0(struct qcom_pcie *pcie) u32 val; int i;
- writel(SLV_ADDR_SPACE_SZ, - pcie->parf + PARF_SLV_ADDR_SPACE_SIZE); - val = readl(pcie->parf + PARF_PHY_CTRL); val &= ~PHY_TEST_PWR_DOWN; writel(val, pcie->parf + PARF_PHY_CTRL);
- writel(0, pcie->parf + PARF_DBI_BASE_ADDR); + qcom_pcie_configure_dbi_atu_base(pcie);
writel(DEVICE_TYPE_RC, pcie->parf + PARF_DEVICE_TYPE); writel(BYPASS | MSTR_AXI_CLK_EN | AHB_CLK_EN,
From: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org
[ Upstream commit 0328947c50324cf4b2d8b181bf948edb8101f59f ]
Right now, PCI endpoint subsystem doesn't assign PCI domain number for the PCI endpoint controllers. But this domain number could be useful to the EPC drivers to uniquely identify each controller based on the hardware instance when there are multiple ones present in an SoC (even multiple RC/EP).
So let's make use of the existing pci_bus_find_domain_nr() API to allocate domain numbers based on either devicetree (linux,pci-domain) property or dynamic domain number allocation scheme.
It should be noted that the domain number allocated by this API will be based on both RC and EP controllers in a SoC. If the 'linux,pci-domain' DT property is present, then the domain number represents the actual hardware instance of the PCI endpoint controller. If not, then the domain number will be allocated based on the PCI EP/RC controller probe order.
If the architecture doesn't support CONFIG_PCI_DOMAINS_GENERIC (rare), then currently a warning is thrown to indicate that the architecture specific implementation is needed.
Link: https://lore.kernel.org/linux-pci/20240828-pci-qcom-hotplug-v4-5-263a385fbbc... Signed-off-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Signed-off-by: Krzysztof Wilczyński kwilczynski@kernel.org Reviewed-by: Frank Li Frank.Li@nxp.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/endpoint/pci-epc-core.c | 14 ++++++++++++++ include/linux/pci-epc.h | 2 ++ 2 files changed, 16 insertions(+)
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 84309dfe0c684..085a2de8b923d 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -838,6 +838,10 @@ void pci_epc_destroy(struct pci_epc *epc) { pci_ep_cfs_remove_epc_group(epc->group); device_unregister(&epc->dev); + +#ifdef CONFIG_PCI_DOMAINS_GENERIC + pci_bus_release_domain_nr(NULL, &epc->dev); +#endif } EXPORT_SYMBOL_GPL(pci_epc_destroy);
@@ -900,6 +904,16 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, epc->dev.release = pci_epc_release; epc->ops = ops;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC + epc->domain_nr = pci_bus_find_domain_nr(NULL, dev); +#else + /* + * TODO: If the architecture doesn't support generic PCI + * domains, then a custom implementation has to be used. + */ + WARN_ONCE(1, "This architecture doesn't support generic PCI domains\n"); +#endif + ret = dev_set_name(&epc->dev, "%s", dev_name(dev)); if (ret) goto put_dev; diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 85bdf2adb7607..8e3dcac55dcd5 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -128,6 +128,7 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number + * @domain_nr: PCI domain number of the endpoint controller * @init_complete: flag to indicate whether the EPC initialization is complete * or not */ @@ -145,6 +146,7 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; + int domain_nr; bool init_complete; };
From: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com
[ Upstream commit f8c35d61ba01afa76846905c67862cdace7f66b0 ]
The SoundWire peripheral enumeration is entirely based on interrupts, more specifically sticky bits tracking state changes.
This patch adds a defensive programming check on the actual status reported in PING frames. If for some reason an interrupt was lost or delayed, the delayed work would detect a peripheral change of status after the bus starts.
The 100ms defined for the delay is not completely arbitrary, if a Peripheral didn't join the bus within that delay then probably the hardware link is broken, and conversely if the detection didn't happen because of software issues the 100ms is still acceptable in terms of user experience.
The overhead of the one-shot workqueue is minimal, and the mutual exclusion ensures that the interrupt and delayed work cannot update the status concurrently.
Reviewed-by: Liam Girdwood liam.r.girdwood@intel.com Signed-off-by: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com Signed-off-by: Bard Liao yung-chuan.liao@linux.intel.com Link: https://lore.kernel.org/r/20240805114921.88007-1-yung-chuan.liao@linux.intel... Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/soundwire/cadence_master.c | 39 ++++++++++++++++++++++++++-- drivers/soundwire/cadence_master.h | 5 ++++ drivers/soundwire/intel.h | 2 ++ drivers/soundwire/intel_auxdevice.c | 1 + drivers/soundwire/intel_bus_common.c | 11 ++++++++ 5 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index e0683a5975d10..05652e983539b 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -890,8 +890,14 @@ static int cdns_update_slave_status(struct sdw_cdns *cdns, } }
- if (is_slave) - return sdw_handle_slave_status(&cdns->bus, status); + if (is_slave) { + int ret; + + mutex_lock(&cdns->status_update_lock); + ret = sdw_handle_slave_status(&cdns->bus, status); + mutex_unlock(&cdns->status_update_lock); + return ret; + }
return 0; } @@ -988,6 +994,31 @@ irqreturn_t sdw_cdns_irq(int irq, void *dev_id) } EXPORT_SYMBOL(sdw_cdns_irq);
+static void cdns_check_attached_status_dwork(struct work_struct *work) +{ + struct sdw_cdns *cdns = + container_of(work, struct sdw_cdns, attach_dwork.work); + enum sdw_slave_status status[SDW_MAX_DEVICES + 1]; + u32 val; + int ret; + int i; + + val = cdns_readl(cdns, CDNS_MCP_SLAVE_STAT); + + for (i = 0; i <= SDW_MAX_DEVICES; i++) { + status[i] = val & 0x3; + if (status[i]) + dev_dbg(cdns->dev, "Peripheral %d status: %d\n", i, status[i]); + val >>= 2; + } + + mutex_lock(&cdns->status_update_lock); + ret = sdw_handle_slave_status(&cdns->bus, status); + mutex_unlock(&cdns->status_update_lock); + if (ret < 0) + dev_err(cdns->dev, "%s: sdw_handle_slave_status failed: %d\n", __func__, ret); +} + /** * cdns_update_slave_status_work - update slave status in a work since we will need to handle * other interrupts eg. CDNS_MCP_INT_RX_WL during the update slave @@ -1740,7 +1771,11 @@ int sdw_cdns_probe(struct sdw_cdns *cdns) init_completion(&cdns->tx_complete); cdns->bus.port_ops = &cdns_port_ops;
+ mutex_init(&cdns->status_update_lock); + INIT_WORK(&cdns->work, cdns_update_slave_status_work); + INIT_DELAYED_WORK(&cdns->attach_dwork, cdns_check_attached_status_dwork); + return 0; } EXPORT_SYMBOL(sdw_cdns_probe); diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index bc84435e420f5..e1d7969ba48ae 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -117,6 +117,8 @@ struct sdw_cdns_dai_runtime { * @link_up: Link status * @msg_count: Messages sent on bus * @dai_runtime_array: runtime context for each allocated DAI. + * @status_update_lock: protect concurrency between interrupt-based and delayed work + * status update */ struct sdw_cdns { struct device *dev; @@ -148,10 +150,13 @@ struct sdw_cdns { bool interrupt_enabled;
struct work_struct work; + struct delayed_work attach_dwork;
struct list_head list;
struct sdw_cdns_dai_runtime **dai_runtime_array; + + struct mutex status_update_lock; /* add mutual exclusion to sdw_handle_slave_status() */ };
#define bus_to_cdns(_bus) container_of(_bus, struct sdw_cdns, bus) diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 68838e843b543..f4235f5991c37 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -103,6 +103,8 @@ static inline void intel_writew(void __iomem *base, int offset, u16 value)
#define INTEL_MASTER_RESET_ITERATIONS 10
+#define SDW_INTEL_DELAYED_ENUMERATION_MS 100 + #define SDW_INTEL_CHECK_OPS(sdw, cb) ((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \ (sdw)->link_res->hw_ops->cb) #define SDW_INTEL_OPS(sdw, cb) ((sdw)->link_res->hw_ops->cb) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 8807e01cbf7c7..dff49c5ce5b30 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -475,6 +475,7 @@ static void intel_link_remove(struct auxiliary_device *auxdev) */ if (!bus->prop.hw_disabled) { sdw_intel_debugfs_exit(sdw); + cancel_delayed_work_sync(&cdns->attach_dwork); sdw_cdns_enable_interrupt(cdns, false); } sdw_bus_master_delete(bus); diff --git a/drivers/soundwire/intel_bus_common.c b/drivers/soundwire/intel_bus_common.c index a75e5d7d87154..d3ff6c65b64c3 100644 --- a/drivers/soundwire/intel_bus_common.c +++ b/drivers/soundwire/intel_bus_common.c @@ -60,6 +60,9 @@ int intel_start_bus(struct sdw_intel *sdw) sdw_cdns_check_self_clearing_bits(cdns, __func__, true, INTEL_MASTER_RESET_ITERATIONS);
+ schedule_delayed_work(&cdns->attach_dwork, + msecs_to_jiffies(SDW_INTEL_DELAYED_ENUMERATION_MS)); + return 0; }
@@ -151,6 +154,9 @@ int intel_start_bus_after_reset(struct sdw_intel *sdw) } sdw_cdns_check_self_clearing_bits(cdns, __func__, true, INTEL_MASTER_RESET_ITERATIONS);
+ schedule_delayed_work(&cdns->attach_dwork, + msecs_to_jiffies(SDW_INTEL_DELAYED_ENUMERATION_MS)); + return 0; }
@@ -184,6 +190,9 @@ int intel_start_bus_after_clock_stop(struct sdw_intel *sdw)
sdw_cdns_check_self_clearing_bits(cdns, __func__, true, INTEL_MASTER_RESET_ITERATIONS);
+ schedule_delayed_work(&cdns->attach_dwork, + msecs_to_jiffies(SDW_INTEL_DELAYED_ENUMERATION_MS)); + return 0; }
@@ -194,6 +203,8 @@ int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) bool wake_enable = false; int ret;
+ cancel_delayed_work_sync(&cdns->attach_dwork); + if (clock_stop) { ret = sdw_cdns_clock_stop(cdns, true); if (ret < 0)
From: Ying Sun sunying@isrc.iscas.ac.cn
[ Upstream commit c6ebf2c528470a09be77d0d9df2c6617ea037ac5 ]
Runs on the kernel with CONFIG_RISCV_ALTERNATIVE enabled: kexec -sl vmlinux
Error: kexec_image: Unknown rela relocation: 34 kexec_image: Error loading purgatory ret=-8 and kexec_image: Unknown rela relocation: 38 kexec_image: Error loading purgatory ret=-8
The purgatory code uses the 16-bit addition and subtraction relocation type, but not handled, resulting in kexec_file_load failure. So add handle to arch_kexec_apply_relocations_add().
Tested on RISC-V64 Qemu-virt, issue fixed.
Co-developed-by: Petr Tesarik petr@tesarici.cz Signed-off-by: Petr Tesarik petr@tesarici.cz Signed-off-by: Ying Sun sunying@isrc.iscas.ac.cn Reviewed-by: Andrew Jones ajones@ventanamicro.com Link: https://lore.kernel.org/r/20240711083236.2859632-1-sunying@isrc.iscas.ac.cn Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/kernel/elf_kexec.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 11c0d2e0becfe..3c37661801f95 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -451,6 +451,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | ENCODE_CJTYPE_IMM(val - addr); break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; case R_RISCV_ADD32: *(u32 *)loc += val; break;
From: Yunke Cao yunkec@chromium.org
[ Upstream commit 6a9c97ab6b7e85697e0b74e86062192a5ffffd99 ]
Clear vb2_plane's memory related fields in __vb2_plane_dmabuf_put(), including bytesused, length, fd and data_offset.
Remove the duplicated code in __prepare_dmabuf().
Signed-off-by: Yunke Cao yunkec@chromium.org Acked-by: Tomasz Figa tfiga@chromium.org Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/common/videobuf2/videobuf2-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 0217392fcc0d9..4d232b08f9505 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -311,6 +311,10 @@ static void __vb2_plane_dmabuf_put(struct vb2_buffer *vb, struct vb2_plane *p) p->mem_priv = NULL; p->dbuf = NULL; p->dbuf_mapped = 0; + p->bytesused = 0; + p->length = 0; + p->m.fd = 0; + p->data_offset = 0; }
/* @@ -1420,10 +1424,6 @@ static int __prepare_dmabuf(struct vb2_buffer *vb)
/* Release previously acquired memory if present */ __vb2_plane_dmabuf_put(vb, &vb->planes[plane]); - vb->planes[plane].bytesused = 0; - vb->planes[plane].length = 0; - vb->planes[plane].m.fd = 0; - vb->planes[plane].data_offset = 0;
/* Acquire each plane's memory */ mem_priv = call_ptr_memop(attach_dmabuf,
From: Peng Fan peng.fan@nxp.com
[ Upstream commit e954a1bd16102abc800629f9900715d8ec4c3130 ]
If there is a resource table device tree node, use the address as the resource table address, otherwise use the address(where .resource_table section loaded) inside the Cortex-M elf file.
And there is an update in NXP SDK that Resource Domain Control(RDC) enabled to protect TCM, linux not able to write the TCM space when updating resource table status and cause kernel dump. So use the address from device tree could avoid kernel dump.
Note: NXP M4 SDK not check resource table update, so it does not matter use whether resource table address specified in elf file or in device tree. But to reflect the fact that if people specific resource table address in device tree, it means people are aware and going to use it, not the address specified in elf file.
Reviewed-by: Iuliana Prodan iuliana.prodan@nxp.com Signed-off-by: Peng Fan peng.fan@nxp.com Reviewed-by: Daniel Baluta daniel.baluta@nxp.com Link: https://lore.kernel.org/r/20240719-imx_rproc-v2-2-10d0268c7eb1@nxp.com Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/remoteproc/imx_rproc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 144c8e9a642e8..f1424da9cb5a7 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -666,6 +666,17 @@ static struct resource_table *imx_rproc_get_loaded_rsc_table(struct rproc *rproc return (struct resource_table *)priv->rsc_table; }
+static struct resource_table * +imx_rproc_elf_find_loaded_rsc_table(struct rproc *rproc, const struct firmware *fw) +{ + struct imx_rproc *priv = rproc->priv; + + if (priv->rsc_table) + return (struct resource_table *)priv->rsc_table; + + return rproc_elf_find_loaded_rsc_table(rproc, fw); +} + static const struct rproc_ops imx_rproc_ops = { .prepare = imx_rproc_prepare, .attach = imx_rproc_attach, @@ -676,7 +687,7 @@ static const struct rproc_ops imx_rproc_ops = { .da_to_va = imx_rproc_da_to_va, .load = rproc_elf_load_segments, .parse_fw = imx_rproc_parse_fw, - .find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table, + .find_loaded_rsc_table = imx_rproc_elf_find_loaded_rsc_table, .get_loaded_rsc_table = imx_rproc_get_loaded_rsc_table, .sanity_check = rproc_elf_sanity_check, .get_boot_addr = rproc_elf_get_boot_addr,
From: Peng Fan peng.fan@nxp.com
[ Upstream commit a54c441b46a0745683c2eef5a359d22856d27323 ]
For i.MX7D DRAM related mux clock, the clock source change should ONLY be done done in low level asm code without accessing DRAM, and then calling clk API to sync the HW clock status with clk tree, it should never touch real clock source switch via clk API, so CLK_SET_PARENT_GATE flag should NOT be added, otherwise, DRAM's clock parent will be disabled when DRAM is active, and system will hang.
Signed-off-by: Peng Fan peng.fan@nxp.com Reviewed-by: Abel Vesa abel.vesa@linaro.org Link: https://lore.kernel.org/r/20240607133347.3291040-8-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa abel.vesa@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/imx/clk-imx7d.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index 2b77d1fc7bb94..1e1296e748357 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -498,9 +498,9 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) hws[IMX7D_ENET_AXI_ROOT_SRC] = imx_clk_hw_mux2_flags("enet_axi_src", base + 0x8900, 24, 3, enet_axi_sel, ARRAY_SIZE(enet_axi_sel), CLK_SET_PARENT_GATE); hws[IMX7D_NAND_USDHC_BUS_ROOT_SRC] = imx_clk_hw_mux2_flags("nand_usdhc_src", base + 0x8980, 24, 3, nand_usdhc_bus_sel, ARRAY_SIZE(nand_usdhc_bus_sel), CLK_SET_PARENT_GATE); hws[IMX7D_DRAM_PHYM_ROOT_SRC] = imx_clk_hw_mux2_flags("dram_phym_src", base + 0x9800, 24, 1, dram_phym_sel, ARRAY_SIZE(dram_phym_sel), CLK_SET_PARENT_GATE); - hws[IMX7D_DRAM_ROOT_SRC] = imx_clk_hw_mux2_flags("dram_src", base + 0x9880, 24, 1, dram_sel, ARRAY_SIZE(dram_sel), CLK_SET_PARENT_GATE); + hws[IMX7D_DRAM_ROOT_SRC] = imx_clk_hw_mux2("dram_src", base + 0x9880, 24, 1, dram_sel, ARRAY_SIZE(dram_sel)); hws[IMX7D_DRAM_PHYM_ALT_ROOT_SRC] = imx_clk_hw_mux2_flags("dram_phym_alt_src", base + 0xa000, 24, 3, dram_phym_alt_sel, ARRAY_SIZE(dram_phym_alt_sel), CLK_SET_PARENT_GATE); - hws[IMX7D_DRAM_ALT_ROOT_SRC] = imx_clk_hw_mux2_flags("dram_alt_src", base + 0xa080, 24, 3, dram_alt_sel, ARRAY_SIZE(dram_alt_sel), CLK_SET_PARENT_GATE); + hws[IMX7D_DRAM_ALT_ROOT_SRC] = imx_clk_hw_mux2("dram_alt_src", base + 0xa080, 24, 3, dram_alt_sel, ARRAY_SIZE(dram_alt_sel)); hws[IMX7D_USB_HSIC_ROOT_SRC] = imx_clk_hw_mux2_flags("usb_hsic_src", base + 0xa100, 24, 3, usb_hsic_sel, ARRAY_SIZE(usb_hsic_sel), CLK_SET_PARENT_GATE); hws[IMX7D_PCIE_CTRL_ROOT_SRC] = imx_clk_hw_mux2_flags("pcie_ctrl_src", base + 0xa180, 24, 3, pcie_ctrl_sel, ARRAY_SIZE(pcie_ctrl_sel), CLK_SET_PARENT_GATE); hws[IMX7D_PCIE_PHY_ROOT_SRC] = imx_clk_hw_mux2_flags("pcie_phy_src", base + 0xa200, 24, 3, pcie_phy_sel, ARRAY_SIZE(pcie_phy_sel), CLK_SET_PARENT_GATE);
From: Zhaoyang Huang zhaoyang.huang@unisoc.com
[ Upstream commit 03e02b94171b1985dd0aa184296fe94425b855a3 ]
This patch is inspired by a code review of fs codes which aims at folio's extra refcnt that could introduce unwanted behavious when judging refcnt, such as[1].That is, the folio passed to mapping_evict_folio carries the refcnts from find_lock_entries, page_cache, corresponding to PTEs and folio's private if has. However, current code doesn't take the refcnt for folio's private which could have mapping_evict_folio miss the one to only PTE and lead to call filemap_release_folio wrongly.
[1] long mapping_evict_folio(struct address_space *mapping, struct folio *folio) { ... //current code will misjudge here if there is one pte on the folio which is be deemed as the one as folio's private if (folio_ref_count(folio) > folio_nr_pages(folio) + folio_has_private(folio) + 1) return 0; if (!filemap_release_folio(folio, 0)) return 0;
return remove_mapping(mapping, folio); }
Signed-off-by: Zhaoyang Huang zhaoyang.huang@unisoc.com Signed-off-by: Anna Schumaker anna.schumaker@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/nfs/write.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d074d0ceb4f01..80c6ded5f74c6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -772,8 +772,7 @@ static void nfs_inode_add_request(struct nfs_page *req) nfs_lock_request(req); spin_lock(&mapping->i_private_lock); set_bit(PG_MAPPED, &req->wb_flags); - folio_set_private(folio); - folio->private = req; + folio_attach_private(folio, req); spin_unlock(&mapping->i_private_lock); atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an @@ -797,8 +796,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
spin_lock(&mapping->i_private_lock); if (likely(folio)) { - folio->private = NULL; - folio_clear_private(folio); + folio_detach_private(folio); clear_bit(PG_MAPPED, &req->wb_head->wb_flags); } spin_unlock(&mapping->i_private_lock);
From: Miklos Szeredi mszeredi@redhat.com
[ Upstream commit efad7153bf93db8565128f7567aab1d23e221098 ]
Only f_path is used from backing files registered with FUSE_DEV_IOC_BACKING_OPEN, so it makes sense to allow O_PATH descriptors.
O_PATH files have an empty f_op, so don't check read_iter/write_iter.
Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/fuse/passthrough.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 9666d13884ce5..62aee8289d110 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -228,16 +228,13 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) if (map->flags || map->padding) goto out;
- file = fget(map->fd); + file = fget_raw(map->fd); res = -EBADF; if (!file) goto out;
- res = -EOPNOTSUPP; - if (!file->f_op->read_iter || !file->f_op->write_iter) - goto out_fput; - backing_sb = file_inode(file)->i_sb; + pr_info("%s: %x:%pD %i\n", __func__, backing_sb->s_dev, file, backing_sb->s_stack_depth); res = -ELOOP; if (backing_sb->s_stack_depth >= fc->max_stack_depth) goto out_fput;
On Fri, 4 Oct 2024 at 20:19, Sasha Levin sashal@kernel.org wrote:
From: Miklos Szeredi mszeredi@redhat.com
[ Upstream commit efad7153bf93db8565128f7567aab1d23e221098 ]
Only f_path is used from backing files registered with FUSE_DEV_IOC_BACKING_OPEN, so it makes sense to allow O_PATH descriptors.
O_PATH files have an empty f_op, so don't check read_iter/write_iter.
Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/fuse/passthrough.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 9666d13884ce5..62aee8289d110 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -228,16 +228,13 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) if (map->flags || map->padding) goto out;
file = fget(map->fd);
file = fget_raw(map->fd); res = -EBADF; if (!file) goto out;
res = -EOPNOTSUPP;
if (!file->f_op->read_iter || !file->f_op->write_iter)
goto out_fput;
backing_sb = file_inode(file)->i_sb;
pr_info("%s: %x:%pD %i\n", __func__, backing_sb->s_dev, file, backing_sb->s_stack_depth);
That's a stray debug line that wasn't in there when I posted the patch for review[1], but somehow made it into the pull...
Since this isn't a bug fix, it would be easiest to just drop the patch from the stable queues.
But I'm okay with just dropping this stray line from the backport, or waiting for an upstream fix which does that.
Thanks, Miklos
[1] https://lore.kernel.org/all/20240913104703.1673180-1-mszeredi@redhat.com/
On Mon, Oct 07, 2024 at 12:15:45PM +0200, Miklos Szeredi wrote:
On Fri, 4 Oct 2024 at 20:19, Sasha Levin sashal@kernel.org wrote:
From: Miklos Szeredi mszeredi@redhat.com
[ Upstream commit efad7153bf93db8565128f7567aab1d23e221098 ]
Only f_path is used from backing files registered with FUSE_DEV_IOC_BACKING_OPEN, so it makes sense to allow O_PATH descriptors.
O_PATH files have an empty f_op, so don't check read_iter/write_iter.
Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/fuse/passthrough.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 9666d13884ce5..62aee8289d110 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -228,16 +228,13 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) if (map->flags || map->padding) goto out;
file = fget(map->fd);
file = fget_raw(map->fd); res = -EBADF; if (!file) goto out;
res = -EOPNOTSUPP;
if (!file->f_op->read_iter || !file->f_op->write_iter)
goto out_fput;
backing_sb = file_inode(file)->i_sb;
pr_info("%s: %x:%pD %i\n", __func__, backing_sb->s_dev, file, backing_sb->s_stack_depth);
That's a stray debug line that wasn't in there when I posted the patch for review[1], but somehow made it into the pull...
Since this isn't a bug fix, it would be easiest to just drop the patch from the stable queues.
But I'm okay with just dropping this stray line from the backport, or waiting for an upstream fix which does that.
I'll just drop it, thanks!
From: Alexander Mikhalitsyn aleksandr.mikhalitsyn@canonical.com
[ Upstream commit 5b8ca5a54cb89ab07b0389f50e038e533cdfdd86 ]
This is needed to properly clear suid/sgid.
Signed-off-by: Alexander Mikhalitsyn aleksandr.mikhalitsyn@canonical.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/fuse/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ed76121f73f2e..536194d41b0b7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1398,6 +1398,7 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct mnt_idmap *idmap = file_mnt_idmap(file); struct address_space *mapping = file->f_mapping; ssize_t written = 0; struct inode *inode = mapping->host; @@ -1412,7 +1413,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) return err;
if (fc->handle_killpriv_v2 && - setattr_should_drop_suidgid(&nop_mnt_idmap, + setattr_should_drop_suidgid(idmap, file_inode(file))) { goto writethrough; }
From: "Jiri Slaby (SUSE)" jirislaby@kernel.org
[ Upstream commit 602babaa84d627923713acaf5f7e9a4369e77473 ]
Commit af224ca2df29 (serial: core: Prevent unsafe uart port access, part 3) added few uport == NULL checks. It added one to uart_shutdown(), so the commit assumes, uport can be NULL in there. But right after that protection, there is an unprotected "uart_port_dtr_rts(uport, false);" call. That is invoked only if HUPCL is set, so I assume that is the reason why we do not see lots of these reports.
Or it cannot be NULL at this point at all for some reason :P.
Until the above is investigated, stay on the safe side and move this dereference to the if too.
I got this inconsistency from Coverity under CID 1585130. Thanks.
Signed-off-by: Jiri Slaby (SUSE) jirislaby@kernel.org Cc: Peter Hurley peter@hurleysoftware.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Link: https://lore.kernel.org/r/20240805102046.307511-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/tty/serial/serial_core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 5bea3af46abce..9763fc5dcc80c 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -407,14 +407,16 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) /* * Turn off DTR and RTS early. */ - if (uport && uart_console(uport) && tty) { - uport->cons->cflag = tty->termios.c_cflag; - uport->cons->ispeed = tty->termios.c_ispeed; - uport->cons->ospeed = tty->termios.c_ospeed; - } + if (uport) { + if (uart_console(uport) && tty) { + uport->cons->cflag = tty->termios.c_cflag; + uport->cons->ispeed = tty->termios.c_ispeed; + uport->cons->ospeed = tty->termios.c_ospeed; + }
- if (!tty || C_HUPCL(tty)) - uart_port_dtr_rts(uport, false); + if (!tty || C_HUPCL(tty)) + uart_port_dtr_rts(uport, false); + }
uart_port_shutdown(port); }
From: Wadim Egorov w.egorov@phytec.de
[ Upstream commit db63d9868f7f310de44ba7bea584e2454f8b4ed0 ]
In polling mode, if no IRQ was requested there is no need to free it. Call devm_free_irq() only if client->irq is set. This fixes the warning caused by the tps6598x module removal:
WARNING: CPU: 2 PID: 333 at kernel/irq/devres.c:144 devm_free_irq+0x80/0x8c ... ... Call trace: devm_free_irq+0x80/0x8c tps6598x_remove+0x28/0x88 [tps6598x] i2c_device_remove+0x2c/0x9c device_remove+0x4c/0x80 device_release_driver_internal+0x1cc/0x228 driver_detach+0x50/0x98 bus_remove_driver+0x6c/0xbc driver_unregister+0x30/0x60 i2c_del_driver+0x54/0x64 tps6598x_i2c_driver_exit+0x18/0xc3c [tps6598x] __arm64_sys_delete_module+0x184/0x264 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0xc8/0xe8 do_el0_svc+0x20/0x2c el0_svc+0x28/0x98 el0t_64_sync_handler+0x13c/0x158 el0t_64_sync+0x190/0x194
Signed-off-by: Wadim Egorov w.egorov@phytec.de Reviewed-by: Heikki Krogerus heikki.krogerus@linux.intel.com Link: https://lore.kernel.org/r/20240816124150.608125-1-w.egorov@phytec.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/typec/tipd/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index dd51a25480bfb..256b0c054e9a9 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1465,8 +1465,9 @@ static void tps6598x_remove(struct i2c_client *client)
if (!client->irq) cancel_delayed_work_sync(&tps->wq_poll); + else + devm_free_irq(tps->dev, client->irq, tps);
- devm_free_irq(tps->dev, client->irq, tps); tps6598x_disconnect(tps, 0); typec_unregister_port(tps->port); usb_role_switch_put(tps->role_sw);
From: Heikki Krogerus heikki.krogerus@linux.intel.com
[ Upstream commit 1d05c382ddb4e43ce251a50f308df5e42a2f6088 ]
That may silently corrupt the data. Instead, failing attempts to read more than the interface can handle.
Signed-off-by: Heikki Krogerus heikki.krogerus@linux.intel.com Reviewed-by: Abhishek Pandit-Subedi abhishekpandit@chromium.org Link: https://lore.kernel.org/r/20240816135859.3499351-3-heikki.krogerus@linux.int... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/typec/ucsi/ucsi.c | 8 ++------ drivers/usb/typec/ucsi/ucsi.h | 2 ++ 2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 17155ed17fdf8..a8aeda77b1b6b 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -99,12 +99,8 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci,
*cci = 0;
- /* - * Below UCSI 2.0, MESSAGE_IN was limited to 16 bytes. Truncate the - * reads here. - */ - if (ucsi->version <= UCSI_VERSION_1_2) - size = clamp(size, 0, 16); + if (size > UCSI_MAX_DATA_LENGTH(ucsi)) + return -EINVAL;
ret = ucsi->ops->sync_control(ucsi, command); if (ret) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 5a3481d36d7ab..db90b513f78a8 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -435,6 +435,8 @@ struct ucsi { #define UCSI_DELAY_DEVICE_PDOS BIT(1) /* Reading PDOs fails until the parter is in PD mode */ };
+#define UCSI_MAX_DATA_LENGTH(u) (((u)->version < UCSI_VERSION_2_0) ? 0x10 : 0xff) + #define UCSI_MAX_SVID 5 #define UCSI_MAX_ALTMODES (UCSI_MAX_SVID * 6)
From: Xu Yang xu.yang_2@nxp.com
[ Upstream commit e4fdcc10092fb244218013bfe8ff01c55d54e8e4 ]
Currently, suspend interrupt is enabled before pullup enable operation. This will cause a suspend interrupt assert right after pullup DP. This suspend interrupt is meaningless, so this will ignore such interrupt by enable it after usb reset completed.
Signed-off-by: Xu Yang xu.yang_2@nxp.com Acked-by: Peter Chen peter.chen@kernel.org Link: https://lore.kernel.org/r/20240823073832.1702135-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/chipidea/udc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 2d7f616270c17..69ef3cd8d4f83 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -86,7 +86,7 @@ static int hw_device_state(struct ci_hdrc *ci, u32 dma) hw_write(ci, OP_ENDPTLISTADDR, ~0, dma); /* interrupt, error, port change, reset, sleep/suspend */ hw_write(ci, OP_USBINTR, ~0, - USBi_UI|USBi_UEI|USBi_PCI|USBi_URI|USBi_SLI); + USBi_UI|USBi_UEI|USBi_PCI|USBi_URI); } else { hw_write(ci, OP_USBINTR, ~0, 0); } @@ -877,6 +877,7 @@ __releases(ci->lock) __acquires(ci->lock) { int retval; + u32 intr;
spin_unlock(&ci->lock); if (ci->gadget.speed != USB_SPEED_UNKNOWN) @@ -890,6 +891,11 @@ __acquires(ci->lock) if (retval) goto done;
+ /* clear SLI */ + hw_write(ci, OP_USBSTS, USBi_SLI, USBi_SLI); + intr = hw_read(ci, OP_USBINTR, ~0); + hw_write(ci, OP_USBINTR, ~0, intr | USBi_SLI); + ci->status = usb_ep_alloc_request(&ci->ep0in->ep, GFP_ATOMIC); if (ci->status == NULL) retval = -ENOMEM;
From: Shawn Shao shawn.shao@jaguarmicro.com
[ Upstream commit 4058c39bd176daf11a826802d940d86292a6b02b ]
The issue is that before entering the crash kernel, the DWC USB controller did not perform operations such as resetting the interrupt mask bits. After entering the crash kernel,before the USB interrupt handler registration was completed while loading the DWC USB driver,an GINTSTS_SOF interrupt was received.This triggered the misroute_irq process within the GIC handling framework,ultimately leading to the misrouting of the interrupt,causing it to be handled by the wrong interrupt handler and resulting in the issue.
Summary:In a scenario where the kernel triggers a panic and enters the crash kernel,it is necessary to ensure that the interrupt mask bit is not enabled before the interrupt registration is complete. If an interrupt reaches the CPU at this moment,it will certainly not be handled correctly,especially in cases where this interrupt is reported frequently.
Please refer to the Crashkernel dmesg information as follows (the message on line 3 was added before devm_request_irq is called by the dwc2_driver_probe function): [ 5.866837][ T1] dwc2 JMIC0010:01: supply vusb_d not found, using dummy regulator [ 5.874588][ T1] dwc2 JMIC0010:01: supply vusb_a not found, using dummy regulator [ 5.882335][ T1] dwc2 JMIC0010:01: before devm_request_irq irq: [71], gintmsk[0xf300080e], gintsts[0x04200009] [ 5.892686][ C0] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-jmnd1.2_RC #18 [ 5.900327][ C0] Hardware name: CMSS HyperCard4-25G/HyperCard4-25G, BIOS 1.6.4 Jul 8 2024 [ 5.908836][ C0] Call trace: [ 5.911965][ C0] dump_backtrace+0x0/0x1f0 [ 5.916308][ C0] show_stack+0x20/0x30 [ 5.920304][ C0] dump_stack+0xd8/0x140 [ 5.924387][ C0] pcie_xxx_handler+0x3c/0x1d8 [ 5.930121][ C0] __handle_irq_event_percpu+0x64/0x1e0 [ 5.935506][ C0] handle_irq_event+0x80/0x1d0 [ 5.940109][ C0] try_one_irq+0x138/0x174 [ 5.944365][ C0] misrouted_irq+0x134/0x140 [ 5.948795][ C0] note_interrupt+0x1d0/0x30c [ 5.953311][ C0] handle_irq_event+0x13c/0x1d0 [ 5.958001][ C0] handle_fasteoi_irq+0xd4/0x260 [ 5.962779][ C0] __handle_domain_irq+0x88/0xf0 [ 5.967555][ C0] gic_handle_irq+0x9c/0x2f0 [ 5.971985][ C0] el1_irq+0xb8/0x140 [ 5.975807][ C0] __setup_irq+0x3dc/0x7cc [ 5.980064][ C0] request_threaded_irq+0xf4/0x1b4 [ 5.985015][ C0] devm_request_threaded_irq+0x80/0x100 [ 5.990400][ C0] dwc2_driver_probe+0x1b8/0x6b0 [ 5.995178][ C0] platform_drv_probe+0x5c/0xb0 [ 5.999868][ C0] really_probe+0xf8/0x51c [ 6.004125][ C0] driver_probe_device+0xfc/0x170 [ 6.008989][ C0] device_driver_attach+0xc8/0xd0 [ 6.013853][ C0] __driver_attach+0xe8/0x1b0 [ 6.018369][ C0] bus_for_each_dev+0x7c/0xdc [ 6.022886][ C0] driver_attach+0x2c/0x3c [ 6.027143][ C0] bus_add_driver+0xdc/0x240 [ 6.031573][ C0] driver_register+0x80/0x13c [ 6.036090][ C0] __platform_driver_register+0x50/0x5c [ 6.041476][ C0] dwc2_platform_driver_init+0x24/0x30 [ 6.046774][ C0] do_one_initcall+0x50/0x25c [ 6.051291][ C0] do_initcall_level+0xe4/0xfc [ 6.055894][ C0] do_initcalls+0x80/0xa4 [ 6.060064][ C0] kernel_init_freeable+0x198/0x240 [ 6.065102][ C0] kernel_init+0x1c/0x12c
Signed-off-by: Shawn Shao shawn.shao@jaguarmicro.com Link: https://lore.kernel.org/r/20240830031709.134-1-shawn.shao@jaguarmicro.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/dwc2/platform.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 7b84416dfc2b1..c1b7209b94836 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -469,18 +469,6 @@ static int dwc2_driver_probe(struct platform_device *dev)
spin_lock_init(&hsotg->lock);
- hsotg->irq = platform_get_irq(dev, 0); - if (hsotg->irq < 0) - return hsotg->irq; - - dev_dbg(hsotg->dev, "registering common handler for irq%d\n", - hsotg->irq); - retval = devm_request_irq(hsotg->dev, hsotg->irq, - dwc2_handle_common_intr, IRQF_SHARED, - dev_name(hsotg->dev), hsotg); - if (retval) - return retval; - hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus"); if (IS_ERR(hsotg->vbus_supply)) { retval = PTR_ERR(hsotg->vbus_supply); @@ -524,6 +512,20 @@ static int dwc2_driver_probe(struct platform_device *dev) if (retval) goto error;
+ hsotg->irq = platform_get_irq(dev, 0); + if (hsotg->irq < 0) { + retval = hsotg->irq; + goto error; + } + + dev_dbg(hsotg->dev, "registering common handler for irq%d\n", + hsotg->irq); + retval = devm_request_irq(hsotg->dev, hsotg->irq, + dwc2_handle_common_intr, IRQF_SHARED, + dev_name(hsotg->dev), hsotg); + if (retval) + goto error; + /* * For OTG cores, set the force mode bits to reflect the value * of dr_mode. Force mode bits should not be touched at any
From: Mathias Nyman mathias.nyman@linux.intel.com
[ Upstream commit 9044ad57b60b0556d42b6f8aa218a68865e810a4 ]
Don't flush all pending DbC data requests when an endpoint halts.
An endpoint may halt and xHC DbC triggers a STALL error event if there's an issue with a bulk data transfer. The transfer should restart once xHC DbC receives a ClearFeature(ENDPOINT_HALT) request from the host.
Once xHC DbC restarts it will start from the TRB pointed to by dequeue field in the endpoint context, which might be the same TRB we got the STALL event for. Turn the TRB to a no-op in this case to make sure xHC DbC doesn't reuse and tries to retransmit this same TRB after we already handled it, and gave its corresponding data request back.
Other STALL events might be completely bogus. Lukasz Bartosik discovered that xHC DbC might issue spurious STALL events if hosts sends a ClearFeature(ENDPOINT_HALT) request to non-halted endpoints even without any active bulk transfers.
Assume STALL event is spurious if it reports 0 bytes transferred, and the endpoint stopped on the STALLED TRB. Don't give back the data request corresponding to the TRB in this case.
The halted status is per endpoint. Track it with a per endpoint flag instead of the driver invented DbC wide DS_STALLED state. DbC remains in DbC-Configured state even if endpoints halt. There is no Stalled state in the DbC Port state Machine (xhci section 7.6.6)
Reported-by: Łukasz Bartosik ukaszb@chromium.org Closes: https://lore.kernel.org/linux-usb/20240725074857.623299-1-ukaszb@chromium.or... Tested-by: Łukasz Bartosik ukaszb@chromium.org Signed-off-by: Mathias Nyman mathias.nyman@linux.intel.com Link: https://lore.kernel.org/r/20240905143300.1959279-2-mathias.nyman@linux.intel... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/host/xhci-dbgcap.c | 133 ++++++++++++++++++++------------- drivers/usb/host/xhci-dbgcap.h | 2 +- 2 files changed, 83 insertions(+), 52 deletions(-)
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 161c09953c4e0..241d7aa1fbc20 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -173,16 +173,18 @@ static void xhci_dbc_giveback(struct dbc_request *req, int status) spin_lock(&dbc->lock); }
-static void xhci_dbc_flush_single_request(struct dbc_request *req) +static void trb_to_noop(union xhci_trb *trb) { - union xhci_trb *trb = req->trb; - trb->generic.field[0] = 0; trb->generic.field[1] = 0; trb->generic.field[2] = 0; trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE); trb->generic.field[3] |= cpu_to_le32(TRB_TYPE(TRB_TR_NOOP)); +}
+static void xhci_dbc_flush_single_request(struct dbc_request *req) +{ + trb_to_noop(req->trb); xhci_dbc_giveback(req, -ESHUTDOWN); }
@@ -649,7 +651,6 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) case DS_DISABLED: return; case DS_CONFIGURED: - case DS_STALLED: if (dbc->driver->disconnect) dbc->driver->disconnect(dbc); break; @@ -669,6 +670,23 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) pm_runtime_put_sync(dbc->dev); /* note, was self.controller */ }
+static void +handle_ep_halt_changes(struct xhci_dbc *dbc, struct dbc_ep *dep, bool halted) +{ + if (halted) { + dev_info(dbc->dev, "DbC Endpoint halted\n"); + dep->halted = 1; + + } else if (dep->halted) { + dev_info(dbc->dev, "DbC Endpoint halt cleared\n"); + dep->halted = 0; + + if (!list_empty(&dep->list_pending)) + writel(DBC_DOOR_BELL_TARGET(dep->direction), + &dbc->regs->doorbell); + } +} + static void dbc_handle_port_status(struct xhci_dbc *dbc, union xhci_trb *event) { @@ -697,6 +715,7 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) struct xhci_ring *ring; int ep_id; int status; + struct xhci_ep_ctx *ep_ctx; u32 comp_code; size_t remain_length; struct dbc_request *req = NULL, *r; @@ -706,8 +725,30 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) ep_id = TRB_TO_EP_ID(le32_to_cpu(event->generic.field[3])); dep = (ep_id == EPID_OUT) ? get_out_ep(dbc) : get_in_ep(dbc); + ep_ctx = (ep_id == EPID_OUT) ? + dbc_bulkout_ctx(dbc) : dbc_bulkin_ctx(dbc); ring = dep->ring;
+ /* Match the pending request: */ + list_for_each_entry(r, &dep->list_pending, list_pending) { + if (r->trb_dma == event->trans_event.buffer) { + req = r; + break; + } + if (r->status == -COMP_STALL_ERROR) { + dev_warn(dbc->dev, "Give back stale stalled req\n"); + ring->num_trbs_free++; + xhci_dbc_giveback(r, 0); + } + } + + if (!req) { + dev_warn(dbc->dev, "no matched request\n"); + return; + } + + trace_xhci_dbc_handle_transfer(ring, &req->trb->generic); + switch (comp_code) { case COMP_SUCCESS: remain_length = 0; @@ -718,31 +759,49 @@ static void dbc_handle_xfer_event(struct xhci_dbc *dbc, union xhci_trb *event) case COMP_TRB_ERROR: case COMP_BABBLE_DETECTED_ERROR: case COMP_USB_TRANSACTION_ERROR: - case COMP_STALL_ERROR: dev_warn(dbc->dev, "tx error %d detected\n", comp_code); status = -comp_code; break; + case COMP_STALL_ERROR: + dev_warn(dbc->dev, "Stall error at bulk TRB %llx, remaining %zu, ep deq %llx\n", + event->trans_event.buffer, remain_length, ep_ctx->deq); + status = 0; + dep->halted = 1; + + /* + * xHC DbC may trigger a STALL bulk xfer event when host sends a + * ClearFeature(ENDPOINT_HALT) request even if there wasn't an + * active bulk transfer. + * + * Don't give back this transfer request as hardware will later + * start processing TRBs starting from this 'STALLED' TRB, + * causing TRBs and requests to be out of sync. + * + * If STALL event shows some bytes were transferred then assume + * it's an actual transfer issue and give back the request. + * In this case mark the TRB as No-Op to avoid hw from using the + * TRB again. + */ + + if ((ep_ctx->deq & ~TRB_CYCLE) == event->trans_event.buffer) { + dev_dbg(dbc->dev, "Ep stopped on Stalled TRB\n"); + if (remain_length == req->length) { + dev_dbg(dbc->dev, "Spurious stall event, keep req\n"); + req->status = -COMP_STALL_ERROR; + req->actual = 0; + return; + } + dev_dbg(dbc->dev, "Give back stalled req, but turn TRB to No-op\n"); + trb_to_noop(req->trb); + } + break; + default: dev_err(dbc->dev, "unknown tx error %d\n", comp_code); status = -comp_code; break; }
- /* Match the pending request: */ - list_for_each_entry(r, &dep->list_pending, list_pending) { - if (r->trb_dma == event->trans_event.buffer) { - req = r; - break; - } - } - - if (!req) { - dev_warn(dbc->dev, "no matched request\n"); - return; - } - - trace_xhci_dbc_handle_transfer(ring, &req->trb->generic); - ring->num_trbs_free++; req->actual = req->length - remain_length; xhci_dbc_giveback(req, status); @@ -762,7 +821,6 @@ static void inc_evt_deq(struct xhci_ring *ring) static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) { dma_addr_t deq; - struct dbc_ep *dep; union xhci_trb *evt; u32 ctrl, portsc; bool update_erdp = false; @@ -814,43 +872,17 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) return EVT_DISC; }
- /* Handle endpoint stall event: */ + /* Check and handle changes in endpoint halt status */ ctrl = readl(&dbc->regs->control); - if ((ctrl & DBC_CTRL_HALT_IN_TR) || - (ctrl & DBC_CTRL_HALT_OUT_TR)) { - dev_info(dbc->dev, "DbC Endpoint stall\n"); - dbc->state = DS_STALLED; - - if (ctrl & DBC_CTRL_HALT_IN_TR) { - dep = get_in_ep(dbc); - xhci_dbc_flush_endpoint_requests(dep); - } - - if (ctrl & DBC_CTRL_HALT_OUT_TR) { - dep = get_out_ep(dbc); - xhci_dbc_flush_endpoint_requests(dep); - } - - return EVT_DONE; - } + handle_ep_halt_changes(dbc, get_in_ep(dbc), ctrl & DBC_CTRL_HALT_IN_TR); + handle_ep_halt_changes(dbc, get_out_ep(dbc), ctrl & DBC_CTRL_HALT_OUT_TR);
/* Clear DbC run change bit: */ if (ctrl & DBC_CTRL_DBC_RUN_CHANGE) { writel(ctrl, &dbc->regs->control); ctrl = readl(&dbc->regs->control); } - break; - case DS_STALLED: - ctrl = readl(&dbc->regs->control); - if (!(ctrl & DBC_CTRL_HALT_IN_TR) && - !(ctrl & DBC_CTRL_HALT_OUT_TR) && - (ctrl & DBC_CTRL_DBC_RUN)) { - dbc->state = DS_CONFIGURED; - break; - } - - return EVT_DONE; default: dev_err(dbc->dev, "Unknown DbC state %d\n", dbc->state); break; @@ -939,7 +971,6 @@ static const char * const dbc_state_strings[DS_MAX] = { [DS_ENABLED] = "enabled", [DS_CONNECTED] = "connected", [DS_CONFIGURED] = "configured", - [DS_STALLED] = "stalled", };
static ssize_t dbc_show(struct device *dev, diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 0118c6288a3cc..97c5dc290138b 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -81,7 +81,6 @@ enum dbc_state { DS_ENABLED, DS_CONNECTED, DS_CONFIGURED, - DS_STALLED, DS_MAX };
@@ -90,6 +89,7 @@ struct dbc_ep { struct list_head list_pending; struct xhci_ring *ring; unsigned int direction:1; + unsigned int halted:1; };
#define DBC_QUEUE_SIZE 16
From: Frank Li Frank.Li@nxp.com
[ Upstream commit a6cd2b3fa89468b5f28b83d211bd25cc589f9eba ]
Parse software managed property 'xhci-skip-phy-init-quirk' and 'xhci-skip-phy-init-quirk' to apply related quirk. It allows usb glue layer driver apply these quirk.
Signed-off-by: Frank Li Frank.Li@nxp.com Link: https://lore.kernel.org/r/20240906-dwc-mp-v5-1-ea8ec6774e7b@nxp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/host/xhci-plat.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 31bdfa52eeb25..ecaa75718e592 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -259,6 +259,12 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s if (device_property_read_bool(tmpdev, "write-64-hi-lo-quirk")) xhci->quirks |= XHCI_WRITE_64_HI_LO;
+ if (device_property_read_bool(tmpdev, "xhci-missing-cas-quirk")) + xhci->quirks |= XHCI_MISSING_CAS; + + if (device_property_read_bool(tmpdev, "xhci-skip-phy-init-quirk")) + xhci->quirks |= XHCI_SKIP_PHY_INIT; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); }
From: Ruffalo Lavoisier ruffalolavoisier@gmail.com
[ Upstream commit 5baeb157b341b1d26a5815aeaa4d3bb9e0444fda ]
- After fopen check NULL before using the file pointer use
Signed-off-by: Ruffalo Lavoisier RuffaloLavoisier@gmail.com Link: https://lore.kernel.org/r/20240906203025.89588-1-RuffaloLavoisier@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/comedi/drivers/ni_routing/tools/convert_c_to_py.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/comedi/drivers/ni_routing/tools/convert_c_to_py.c b/drivers/comedi/drivers/ni_routing/tools/convert_c_to_py.c index d55521b5bdcb2..892a66b2cea66 100644 --- a/drivers/comedi/drivers/ni_routing/tools/convert_c_to_py.c +++ b/drivers/comedi/drivers/ni_routing/tools/convert_c_to_py.c @@ -140,6 +140,11 @@ int main(void) { FILE *fp = fopen("ni_values.py", "w");
+ if (fp == NULL) { + fprintf(stderr, "Could not open file!"); + return -1; + } + /* write route register values */ fprintf(fp, "ni_route_values = {\n"); for (int i = 0; ni_all_route_values[i]; ++i)
From: Wentao Guan guanwentao@uniontech.com
[ Upstream commit 5016c3a31a6d74eaf2fdfdec673eae8fcf90379e ]
Add kfree(root_ops) in this case to avoid memleak of root_ops, leaks when pci_find_bus() != 0.
Signed-off-by: Yuli Wang wangyuli@uniontech.com Signed-off-by: Wentao Guan guanwentao@uniontech.com Signed-off-by: Huacai Chen chenhuacai@loongson.cn Signed-off-by: Sasha Levin sashal@kernel.org --- arch/loongarch/pci/acpi.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 365f7de771cbb..1da4dc46df43e 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -225,6 +225,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) if (bus) { memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window)); kfree(info); + kfree(root_ops); } else { struct pci_bus *child;
From: Florian Westphal fw@strlen.de
[ Upstream commit d8f84a9bc7c4e07fdc4edc00f9e868b8db974ccb ]
A conntrack entry can be inserted to the connection tracking table if there is no existing entry with an identical tuple in either direction.
Example: INITIATOR -> NAT/PAT -> RESPONDER
Initiator passes through NAT/PAT ("us") and SNAT is done (saddr rewrite). Then, later, NAT/PAT machine itself also wants to connect to RESPONDER.
This will not work if the SNAT done earlier has same IP:PORT source pair.
Conntrack table has: ORIGINAL: $IP_INITATOR:$SPORT -> $IP_RESPONDER:$DPORT REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
and new locally originating connection wants: ORIGINAL: $IP_NAT:$SPORT -> $IP_RESPONDER:$DPORT REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
This is handled by the NAT engine which will do a source port reallocation for the locally originating connection that is colliding with an existing tuple by attempting a source port rewrite.
This is done even if this new connection attempt did not go through a masquerade/snat rule.
There is a rare race condition with connection-less protocols like UDP, where we do the port reallocation even though its not needed.
This happens when new packets from the same, pre-existing flow are received in both directions at the exact same time on different CPUs after the conntrack table was flushed (or conntrack becomes active for first time).
With strict ordering/single cpu, the first packet creates new ct entry and second packet is resolved as established reply packet.
With parallel processing, both packets are picked up as new and both get their own ct entry.
In this case, the 'reply' packet (picked up as ORIGINAL) can be mangled by NAT engine because a port collision is detected.
This change isn't enough to prevent a packet drop later during nf_conntrack_confirm(), the existing clash resolution strategy will not detect such reverse clash case. This is resolved by a followup patch.
Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_nat_core.c | 120 +++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 016c816d91cbc..c212b1b137222 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -183,7 +183,35 @@ hash_by_src(const struct net *net, return reciprocal_scale(hash, nf_nat_htable_size); }
-/* Is this tuple already taken? (not by us) */ +/** + * nf_nat_used_tuple - check if proposed nat tuple clashes with existing entry + * @tuple: proposed NAT binding + * @ignored_conntrack: our (unconfirmed) conntrack entry + * + * A conntrack entry can be inserted to the connection tracking table + * if there is no existing entry with an identical tuple in either direction. + * + * Example: + * INITIATOR -> NAT/PAT -> RESPONDER + * + * INITIATOR passes through NAT/PAT ("us") and SNAT is done (saddr rewrite). + * Then, later, NAT/PAT itself also connects to RESPONDER. + * + * This will not work if the SNAT done earlier has same IP:PORT source pair. + * + * Conntrack table has: + * ORIGINAL: $IP_INITIATOR:$SPORT -> $IP_RESPONDER:$DPORT + * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT + * + * and new locally originating connection wants: + * ORIGINAL: $IP_NAT:$SPORT -> $IP_RESPONDER:$DPORT + * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT + * + * ... which would mean incoming packets cannot be distinguished between + * the existing and the newly added entry (identical IP_CT_DIR_REPLY tuple). + * + * @return: true if the proposed NAT mapping collides with an existing entry. + */ static int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -200,6 +228,94 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, return nf_conntrack_tuple_taken(&reply, ignored_conntrack); }
+static bool nf_nat_allow_clash(const struct nf_conn *ct) +{ + return nf_ct_l4proto_find(nf_ct_protonum(ct))->allow_clash; +} + +/** + * nf_nat_used_tuple_new - check if to-be-inserted conntrack collides with existing entry + * @tuple: proposed NAT binding + * @ignored_ct: our (unconfirmed) conntrack entry + * + * Same as nf_nat_used_tuple, but also check for rare clash in reverse + * direction. Should be called only when @tuple has not been altered, i.e. + * @ignored_conntrack will not be subject to NAT. + * + * @return: true if the proposed NAT mapping collides with existing entry. + */ +static noinline bool +nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_ct) +{ + static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT; + const struct nf_conntrack_tuple_hash *thash; + const struct nf_conntrack_zone *zone; + struct nf_conn *ct; + bool taken = true; + struct net *net; + + if (!nf_nat_used_tuple(tuple, ignored_ct)) + return false; + + if (!nf_nat_allow_clash(ignored_ct)) + return true; + + /* Initial choice clashes with existing conntrack. + * Check for (rare) reverse collision. + * + * This can happen when new packets are received in both directions + * at the exact same time on different CPUs. + * + * Without SMP, first packet creates new conntrack entry and second + * packet is resolved as established reply packet. + * + * With parallel processing, both packets could be picked up as + * new and both get their own ct entry allocated. + * + * If ignored_conntrack and colliding ct are not subject to NAT then + * pretend the tuple is available and let later clash resolution + * handle this at insertion time. + * + * Without it, the 'reply' packet has its source port rewritten + * by nat engine. + */ + if (READ_ONCE(ignored_ct->status) & uses_nat) + return true; + + net = nf_ct_net(ignored_ct); + zone = nf_ct_zone(ignored_ct); + + thash = nf_conntrack_find_get(net, zone, tuple); + if (unlikely(!thash)) /* clashing entry went away */ + return false; + + ct = nf_ct_tuplehash_to_ctrack(thash); + + /* NB: IP_CT_DIR_ORIGINAL should be impossible because + * nf_nat_used_tuple() handles origin collisions. + * + * Handle remote chance other CPU confirmed its ct right after. + */ + if (thash->tuple.dst.dir != IP_CT_DIR_REPLY) + goto out; + + /* clashing connection subject to NAT? Retry with new tuple. */ + if (READ_ONCE(ct->status) & uses_nat) + goto out; + + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) { + taken = false; + goto out; + } +out: + nf_ct_put(ct); + return taken; +} + static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags) { static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT | @@ -611,7 +727,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (nf_in_range(orig_tuple, range)) { - if (!nf_nat_used_tuple(orig_tuple, ct)) { + if (!nf_nat_used_tuple_new(orig_tuple, ct)) { *tuple = *orig_tuple; return; }
From: Simon Horman horms@kernel.org
[ Upstream commit fc56878ca1c288e49b5cbb43860a5938e3463654 ]
If CONFIG_BRIDGE_NETFILTER is not enabled, which is the case for x86_64 defconfig, then building nf_reject_ipv4.c and nf_reject_ipv6.c with W=1 using gcc-14 results in the following warnings, which are treated as errors:
net/ipv4/netfilter/nf_reject_ipv4.c: In function 'nf_send_reset': net/ipv4/netfilter/nf_reject_ipv4.c:243:23: error: variable 'niph' set but not used [-Werror=unused-but-set-variable] 243 | struct iphdr *niph; | ^~~~ cc1: all warnings being treated as errors net/ipv6/netfilter/nf_reject_ipv6.c: In function 'nf_send_reset6': net/ipv6/netfilter/nf_reject_ipv6.c:286:25: error: variable 'ip6h' set but not used [-Werror=unused-but-set-variable] 286 | struct ipv6hdr *ip6h; | ^~~~ cc1: all warnings being treated as errors
Address this by reducing the scope of these local variables to where they are used, which is code only compiled when CONFIG_BRIDGE_NETFILTER enabled.
Compile tested and run through netfilter selftests.
Reported-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Closes: https://lore.kernel.org/netfilter-devel/20240906145513.567781-1-andriy.shevc... Signed-off-by: Simon Horman horms@kernel.org Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/netfilter/nf_reject_ipv4.c | 10 ++++------ net/ipv6/netfilter/nf_reject_ipv6.c | 5 ++--- 2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 04504b2b51df5..87fd945a0d27a 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -239,9 +239,8 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { - struct sk_buff *nskb; - struct iphdr *niph; const struct tcphdr *oth; + struct sk_buff *nskb; struct tcphdr _oth;
oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); @@ -266,14 +265,12 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
skb_reserve(nskb, LL_MAX_HEADER); - niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); if (ip_route_me_harder(net, sk, nskb, RTN_UNSPEC)) goto free_nskb;
- niph = ip_hdr(nskb); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; @@ -290,6 +287,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, */ if (nf_bridge_info_exists(oldskb)) { struct ethhdr *oeth = eth_hdr(oldskb); + struct iphdr *niph = ip_hdr(nskb); struct net_device *br_indev;
br_indev = nf_bridge_get_physindev(oldskb, net); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index dedee264b8f6c..69a78550261fb 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -283,7 +283,6 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, const struct tcphdr *otcph; unsigned int otcplen, hh_len; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; struct flowi6 fl6;
@@ -339,8 +338,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nskb->mark = fl6.flowi6_mark;
skb_reserve(nskb, hh_len + dst->header_len); - ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - ip6_dst_hoplimit(dst)); + nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
nf_ct_attach(nskb, oldskb); @@ -355,6 +353,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, */ if (nf_bridge_info_exists(oldskb)) { struct ethhdr *oeth = eth_hdr(oldskb); + struct ipv6hdr *ip6h = ipv6_hdr(nskb); struct net_device *br_indev;
br_indev = nf_bridge_get_physindev(oldskb, net);
From: Philip Chen philipchen@chromium.org
[ Upstream commit e25fbcd97cf52c3c9824d44b5c56c19673c3dd50 ]
If a pmem device is in a bad status, the driver side could wait for host ack forever in virtio_pmem_flush(), causing the system to hang.
So add a status check in the beginning of virtio_pmem_flush() to return early if the device is not activated.
Signed-off-by: Philip Chen philipchen@chromium.org Message-Id: 20240826215313.2673566-1-philipchen@chromium.org Signed-off-by: Michael S. Tsirkin mst@redhat.com Acked-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/nd_virtio.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c index 35c8fbbba10ed..f55d60922b87d 100644 --- a/drivers/nvdimm/nd_virtio.c +++ b/drivers/nvdimm/nd_virtio.c @@ -44,6 +44,15 @@ static int virtio_pmem_flush(struct nd_region *nd_region) unsigned long flags; int err, err1;
+ /* + * Don't bother to submit the request to the device if the device is + * not activated. + */ + if (vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_NEEDS_RESET) { + dev_info(&vdev->dev, "virtio pmem device needs a reset\n"); + return -EIO; + } + might_sleep(); req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); if (!req_data)
From: Zhu Jun zhujun2@cmss.chinamobile.com
[ Upstream commit 3c6b818b097dd6932859bcc3d6722a74ec5931c1 ]
Added a check to handle memory allocation failure for `trigger_name` and return `-ENOMEM`.
Signed-off-by: Zhu Jun zhujun2@cmss.chinamobile.com Link: https://patch.msgid.link/20240828093129.3040-1-zhujun2@cmss.chinamobile.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/iio/iio_generic_buffer.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 0d0a7a19d6f95..9ef5ee087eda3 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -498,6 +498,10 @@ int main(int argc, char **argv) return -ENOMEM; } trigger_name = malloc(IIO_MAX_NAME_LENGTH); + if (!trigger_name) { + ret = -ENOMEM; + goto error; + } ret = read_sysfs_string("name", trig_dev_name, trigger_name); free(trig_dev_name); if (ret < 0) {
From: Riyan Dhiman riyandhiman14@gmail.com
[ Upstream commit a8a8b54350229f59c8ba6496fb5689a1632a59be ]
The geoid is a module parameter that allows users to hardcode the slot number. A bound check for geoid was added in the probe function because only values between 0 and less than VME_MAX_SLOT are valid.
Signed-off-by: Riyan Dhiman riyandhiman14@gmail.com Reviewed-by: Dan Carpenter dan.carpenter@linaro.org Link: https://lore.kernel.org/r/20240827125604.42771-2-riyandhiman14@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/vme_user/vme_fake.c | 6 ++++++ drivers/staging/vme_user/vme_tsi148.c | 6 ++++++ 2 files changed, 12 insertions(+)
diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index 7f84d1c86f291..c4fb2b65154c7 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -1059,6 +1059,12 @@ static int __init fake_init(void) struct vme_slave_resource *slave_image; struct vme_lm_resource *lm;
+ if (geoid < 0 || geoid >= VME_MAX_SLOTS) { + pr_err("VME geographical address must be between 0 and %d (exclusive), but got %d\n", + VME_MAX_SLOTS, geoid); + return -EINVAL; + } + /* We need a fake parent device */ vme_root = root_device_register("vme"); if (IS_ERR(vme_root)) diff --git a/drivers/staging/vme_user/vme_tsi148.c b/drivers/staging/vme_user/vme_tsi148.c index 2ec9c29044041..e40ca4870d704 100644 --- a/drivers/staging/vme_user/vme_tsi148.c +++ b/drivers/staging/vme_user/vme_tsi148.c @@ -2253,6 +2253,12 @@ static int tsi148_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct vme_dma_resource *dma_ctrlr; struct vme_lm_resource *lm;
+ if (geoid < 0 || geoid >= VME_MAX_SLOTS) { + dev_err(&pdev->dev, "VME geographical address must be between 0 and %d (exclusive), but got %d\n", + VME_MAX_SLOTS, geoid); + return -EINVAL; + } + /* If we want to support more than one of each bridge, we need to * dynamically generate this so we get one per device */
From: Abhishek Tamboli abhishektamboli9@gmail.com
[ Upstream commit a7bb96b18864225a694e3887ac2733159489e4b0 ]
Fix potential dereferencing of ERR_PTR() in find_format_by_pix() and uvc_v4l2_enum_format().
Fix the following smatch errors:
drivers/usb/gadget/function/uvc_v4l2.c:124 find_format_by_pix() error: 'fmtdesc' dereferencing possible ERR_PTR()
drivers/usb/gadget/function/uvc_v4l2.c:392 uvc_v4l2_enum_format() error: 'fmtdesc' dereferencing possible ERR_PTR()
Also, fix similar issue in uvc_v4l2_try_format() for potential dereferencing of ERR_PTR().
Signed-off-by: Abhishek Tamboli abhishektamboli9@gmail.com Link: https://lore.kernel.org/r/20240815102202.594812-1-abhishektamboli9@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/gadget/function/uvc_v4l2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index a024aecb76dc3..de1736f834e6b 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -121,6 +121,9 @@ static struct uvcg_format *find_format_by_pix(struct uvc_device *uvc, list_for_each_entry(format, &uvc->header->formats, entry) { const struct uvc_format_desc *fmtdesc = to_uvc_format(format->fmt);
+ if (IS_ERR(fmtdesc)) + continue; + if (fmtdesc->fcc == pixelformat) { uformat = format->fmt; break; @@ -240,6 +243,7 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) struct uvc_video *video = &uvc->video; struct uvcg_format *uformat; struct uvcg_frame *uframe; + const struct uvc_format_desc *fmtdesc; u8 *fcc;
if (fmt->type != video->queue.queue.type) @@ -277,7 +281,10 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) fmt->fmt.pix.height = uframe->frame.w_height; fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(uformat, uframe); fmt->fmt.pix.sizeimage = uvc_get_frame_size(uformat, uframe); - fmt->fmt.pix.pixelformat = to_uvc_format(uformat)->fcc; + fmtdesc = to_uvc_format(uformat); + if (IS_ERR(fmtdesc)) + return PTR_ERR(fmtdesc); + fmt->fmt.pix.pixelformat = fmtdesc->fcc; } fmt->fmt.pix.field = V4L2_FIELD_NONE; fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; @@ -389,6 +396,9 @@ uvc_v4l2_enum_format(struct file *file, void *fh, struct v4l2_fmtdesc *f) return -EINVAL;
fmtdesc = to_uvc_format(uformat); + if (IS_ERR(fmtdesc)) + return PTR_ERR(fmtdesc); + f->pixelformat = fmtdesc->fcc;
return 0;
From: Ken Raeburn raeburn@redhat.com
[ Upstream commit 0808ebf2f80b962e75741a41ced372a7116f1e26 ]
Clear the dedupe_context pointer in a data_vio whenever ownership of the context is lost, so that vdo can't examine it accidentally.
Signed-off-by: Ken Raeburn raeburn@redhat.com Signed-off-by: Matthew Sakai msakai@redhat.com Signed-off-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/dm-vdo/dedupe.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 39ac68614419f..80628ae93fbac 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -729,6 +729,7 @@ static void process_update_result(struct data_vio *agent) !change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) return;
+ agent->dedupe_context = NULL; release_context(context); }
@@ -1648,6 +1649,7 @@ static void process_query_result(struct data_vio *agent)
if (change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) { agent->is_duplicate = decode_uds_advice(context); + agent->dedupe_context = NULL; release_context(context); } } @@ -2321,6 +2323,7 @@ static void timeout_index_operations_callback(struct vdo_completion *completion) * send its requestor on its way. */ list_del_init(&context->list_entry); + context->requestor->dedupe_context = NULL; continue_data_vio(context->requestor); timed_out++; }
From: Zijun Hu quic_zijuhu@quicinc.com
[ Upstream commit bfa54a793ba77ef696755b66f3ac4ed00c7d1248 ]
For bus_register(), any error which happens after kset_register() will cause that @priv are freed twice, fixed by setting @priv with NULL after the first free.
Signed-off-by: Zijun Hu quic_zijuhu@quicinc.com Link: https://lore.kernel.org/r/20240727-bus_register_fix-v1-1-fed8dd0dba7a@quicin... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/bus.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c index ffea0728b8b2f..08362ecec0ecb 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -920,6 +920,8 @@ int bus_register(const struct bus_type *bus) bus_remove_file(bus, &bus_attr_uevent); bus_uevent_fail: kset_unregister(&priv->subsys); + /* Above kset_unregister() will kfree @priv */ + priv = NULL; out: kfree(priv); return retval;
From: Zijun Hu quic_zijuhu@quicinc.com
[ Upstream commit c0fd973c108cdc22a384854bc4b3e288a9717bb2 ]
Return -EIO instead of 0 for below erroneous bus attribute operations: - read a bus attribute without show(). - write a bus attribute without store().
Signed-off-by: Zijun Hu quic_zijuhu@quicinc.com Link: https://lore.kernel.org/r/20240724-bus_fix-v2-1-5adbafc698fb@quicinc.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 08362ecec0ecb..6a68734e7ebd1 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -152,7 +152,8 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); - ssize_t ret = 0; + /* return -EIO for reading a bus attribute without show() */ + ssize_t ret = -EIO;
if (bus_attr->show) ret = bus_attr->show(subsys_priv->bus, buf); @@ -164,7 +165,8 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); - ssize_t ret = 0; + /* return -EIO for writing a bus attribute without store() */ + ssize_t ret = -EIO;
if (bus_attr->store) ret = bus_attr->store(subsys_priv->bus, buf, count);
From: Justin Tee justin.tee@broadcom.com
[ Upstream commit 93bcc5f3984bf4f51da1529700aec351872dbfff ]
During HBA stress testing, a spam of received PLOGIs exposes a resource recovery bug causing leakage of lpfc_sqlq entries from the global phba->sli4_hba.lpfc_els_sgl_list.
The issue is in lpfc_els_flush_cmd(), where the driver attempts to recover outstanding ELS sgls when walking the txcmplq. Only CMD_ELS_REQUEST64_CRs and CMD_GEN_REQUEST64_CRs are added to the abort and cancel lists. A check for CMD_XMIT_ELS_RSP64_WQE is missing in order to recover LS_ACC usages of the phba->sli4_hba.lpfc_els_sgl_list too.
Fix by adding CMD_XMIT_ELS_RSP64_WQE as part of the txcmplq walk when adding WQEs to the abort and cancel list in lpfc_els_flush_cmd(). Also, update naming convention from CRs to WQEs.
Signed-off-by: Justin Tee justin.tee@broadcom.com Link: https://lore.kernel.org/r/20240912232447.45607-2-justintee8345@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_els.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 929cbfc95163b..c06234ff8064b 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -9641,11 +9641,12 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (piocb->cmd_flag & LPFC_DRIVER_ABORTED && !mbx_tmo_err) continue;
- /* On the ELS ring we can have ELS_REQUESTs or - * GEN_REQUESTs waiting for a response. + /* On the ELS ring we can have ELS_REQUESTs, ELS_RSPs, + * or GEN_REQUESTs waiting for a CQE response. */ ulp_command = get_job_cmnd(phba, piocb); - if (ulp_command == CMD_ELS_REQUEST64_CR) { + if (ulp_command == CMD_ELS_REQUEST64_WQE || + ulp_command == CMD_XMIT_ELS_RSP64_WQE) { list_add_tail(&piocb->dlist, &abort_list);
/* If the link is down when flushing ELS commands
From: Justin Tee justin.tee@broadcom.com
[ Upstream commit 0a3c84f71680684c1d41abb92db05f95c09111e8 ]
Deleting an NPIV instance requires all fabric ndlps to be released before an NPIV's resources can be torn down. Failure to release fabric ndlps beforehand opens kref imbalance race conditions. Fix by forcing the DA_ID to complete synchronously with usage of wait_queue.
Signed-off-by: Justin Tee justin.tee@broadcom.com Link: https://lore.kernel.org/r/20240912232447.45607-6-justintee8345@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_ct.c | 12 ++++++++++ drivers/scsi/lpfc/lpfc_disc.h | 7 ++++++ drivers/scsi/lpfc/lpfc_vport.c | 43 ++++++++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 2dedd1493e5ba..1e5db489a00c5 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1647,6 +1647,18 @@ lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, }
out: + /* If the caller wanted a synchronous DA_ID completion, signal the + * wait obj and clear flag to reset the vport. + */ + if (ndlp->save_flags & NLP_WAIT_FOR_DA_ID) { + if (ndlp->da_id_waitq) + wake_up(ndlp->da_id_waitq); + } + + spin_lock_irq(&ndlp->lock); + ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); + lpfc_ct_free_iocb(phba, cmdiocb); lpfc_nlp_put(ndlp); return; diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index f82615d87c4bb..f5ae8cc158205 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -90,6 +90,8 @@ enum lpfc_nlp_save_flags { NLP_IN_RECOV_POST_DEV_LOSS = 0x1, /* wait for outstanding LOGO to cmpl */ NLP_WAIT_FOR_LOGO = 0x2, + /* wait for outstanding DA_ID to finish */ + NLP_WAIT_FOR_DA_ID = 0x4 };
struct lpfc_nodelist { @@ -159,7 +161,12 @@ struct lpfc_nodelist { uint32_t nvme_fb_size; /* NVME target's supported byte cnt */ #define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */ uint32_t nlp_defer_did; + + /* These wait objects are NPIV specific. These IOs must complete + * synchronously. + */ wait_queue_head_t *logo_waitq; + wait_queue_head_t *da_id_waitq; };
struct lpfc_node_rrq { diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 4439167a51882..7a4d4d8e2ad55 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -626,6 +626,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport) struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_hba *phba = vport->phba; int rc; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
if (vport->port_type == LPFC_PHYSICAL_PORT) { lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -679,21 +680,49 @@ lpfc_vport_delete(struct fc_vport *fc_vport) if (!ndlp) goto skip_logo;
+ /* Send the DA_ID and Fabric LOGO to cleanup the NPIV fabric entries. */ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && phba->link_state >= LPFC_LINK_UP && phba->fc_topology != LPFC_TOPOLOGY_LOOP) { if (vport->cfg_enable_da_id) { - /* Send DA_ID and wait for a completion. */ + /* Send DA_ID and wait for a completion. This is best + * effort. If the DA_ID fails, likely the fabric will + * "leak" NportIDs but at least the driver issued the + * command. + */ + ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (!ndlp) + goto issue_logo; + + spin_lock_irq(&ndlp->lock); + ndlp->da_id_waitq = &waitq; + ndlp->save_flags |= NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); + rc = lpfc_ns_cmd(vport, SLI_CTNS_DA_ID, 0, 0); - if (rc) { - lpfc_printf_log(vport->phba, KERN_WARNING, - LOG_VPORT, - "1829 CT command failed to " - "delete objects on fabric, " - "rc %d\n", rc); + if (!rc) { + wait_event_timeout(waitq, + !(ndlp->save_flags & NLP_WAIT_FOR_DA_ID), + msecs_to_jiffies(phba->fc_ratov * 2000)); } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT | LOG_ELS, + "1829 DA_ID issue status %d. " + "SFlag x%x NState x%x, NFlag x%x " + "Rpi x%x\n", + rc, ndlp->save_flags, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); + + /* Remove the waitq and save_flags. It no + * longer matters if the wake happened. + */ + spin_lock_irq(&ndlp->lock); + ndlp->da_id_waitq = NULL; + ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID; + spin_unlock_irq(&ndlp->lock); }
+issue_logo: /* * If the vpi is not registered, then a valid FDISC doesn't * exist and there is no need for a ELS LOGO. Just cleanup
From: Justin Tee justin.tee@broadcom.com
[ Upstream commit 1af9af1f8ab38f1285b27581a5e6920ec58296ba ]
Revise certain log messages marked as KERN_ERR LOG_TRACE_EVENT to KERN_WARNING and use the lpfc_vlog_msg() macro to still log the event. The benefit is that events of interest are still logged and the entire trace buffer is not dumped with extraneous logging information when using default lpfc_log_verbose driver parameter settings.
Also, delete the keyword "fail" from such log messages as they aren't really causes for concern. The log messages are more for warnings to a SAN admin about SAN activity.
Signed-off-by: Justin Tee justin.tee@broadcom.com Link: https://lore.kernel.org/r/20240912232447.45607-7-justintee8345@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_ct.c | 10 +-- drivers/scsi/lpfc/lpfc_els.c | 125 +++++++++++++++++------------------ 2 files changed, 64 insertions(+), 71 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 1e5db489a00c5..134bc96dd1340 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1572,8 +1572,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } } } else - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "3065 GFT_ID failed x%08x\n", ulp_status); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_DISCOVERY, + "3065 GFT_ID status x%08x\n", ulp_status);
out: lpfc_ct_free_iocb(phba, cmdiocb); @@ -2258,7 +2258,7 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, }
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "0229 FDMI cmd %04x failed, latt = %d " + "0229 FDMI cmd %04x latt = %d " "ulp_status: x%x, rid x%x\n", be16_to_cpu(fdmi_cmd), latt, ulp_status, ulp_word4); @@ -2275,9 +2275,9 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check for a CT LS_RJT response */ cmd = be16_to_cpu(fdmi_cmd); if (be16_to_cpu(fdmi_rsp) == SLI_CT_RESPONSE_FS_RJT) { - /* FDMI rsp failed */ + /* Log FDMI reject */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_ELS, - "0220 FDMI cmd failed FS_RJT Data: x%x", cmd); + "0220 FDMI cmd FS_RJT Data: x%x", cmd);
/* Should we fallback to FDMI-2 / FDMI-1 ? */ switch (cmd) { diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index c06234ff8064b..8c1505e5394fd 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -979,7 +979,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, phba->fcoe_cvl_eventtag_attn = phba->fcoe_cvl_eventtag; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP | LOG_ELS, - "2611 FLOGI failed on FCF (x%x), " + "2611 FLOGI FCF (x%x), " "status:x%x/x%x, tmo:x%x, perform " "roundrobin FCF failover\n", phba->fcf.current_rec.fcf_indx, @@ -997,11 +997,11 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (!(ulp_status == IOSTAT_LOCAL_REJECT && ((ulp_word4 & IOERR_PARAM_MASK) == IOERR_LOOP_OPEN_FAILURE))) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2858 FLOGI failure Status:x%x/x%x TMO" - ":x%x Data x%lx x%x\n", - ulp_status, ulp_word4, tmo, - phba->hba_flag, phba->fcf.fcf_flag); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2858 FLOGI Status:x%x/x%x TMO" + ":x%x Data x%lx x%x\n", + ulp_status, ulp_word4, tmo, + phba->hba_flag, phba->fcf.fcf_flag);
/* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { @@ -1023,7 +1023,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, lpfc_nlp_put(ndlp);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, - "0150 FLOGI failure Status:x%x/x%x " + "0150 FLOGI Status:x%x/x%x " "xri x%x TMO:x%x refcnt %d\n", ulp_status, ulp_word4, cmdiocb->sli4_xritag, tmo, kref_read(&ndlp->kref)); @@ -1032,11 +1032,11 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (!(ulp_status == IOSTAT_LOCAL_REJECT && ((ulp_word4 & IOERR_PARAM_MASK) == IOERR_LOOP_OPEN_FAILURE))) { - /* FLOGI failure */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0100 FLOGI failure Status:x%x/x%x " - "TMO:x%x\n", - ulp_status, ulp_word4, tmo); + /* Warn FLOGI status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0100 FLOGI Status:x%x/x%x " + "TMO:x%x\n", + ulp_status, ulp_word4, tmo); goto flogifail; }
@@ -1958,16 +1958,16 @@ lpfc_cmpl_els_rrq(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
if (ulp_status) { /* Check for retry */ - /* RRQ failed Don't print the vport to vport rjts */ + /* Warn RRQ status Don't print the vport to vport rjts */ if (ulp_status != IOSTAT_LS_RJT || (((ulp_word4) >> 16 != LSRJT_INVALID_CMD) && ((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) || (phba)->pport->cfg_log_verbose & LOG_ELS) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2881 RRQ failure DID:%06X Status:" - "x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2881 RRQ DID:%06X Status:" + "x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); }
lpfc_clr_rrq_active(phba, rrq->xritag, rrq); @@ -2071,16 +2071,16 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } goto out; } - /* PLOGI failed Don't print the vport to vport rjts */ + /* Warn PLOGI status Don't print the vport to vport rjts */ if (ulp_status != IOSTAT_LS_RJT || (((ulp_word4) >> 16 != LSRJT_INVALID_CMD) && ((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) || (phba)->pport->cfg_log_verbose & LOG_ELS) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2753 PLOGI failure DID:%06X " - "Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2753 PLOGI DID:%06X " + "Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4);
/* Do not call DSM for lpfc_els_abort'ed ELS cmds */ if (!lpfc_error_lost_link(vport, ulp_status, ulp_word4)) @@ -2317,7 +2317,6 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_vport *vport = cmdiocb->vport; struct lpfc_nodelist *ndlp; char *mode; - u32 loglevel; u32 ulp_status; u32 ulp_word4; bool release_node = false; @@ -2366,17 +2365,14 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * could be expected. */ if (test_bit(FC_FABRIC, &vport->fc_flag) || - vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) { - mode = KERN_ERR; - loglevel = LOG_TRACE_EVENT; - } else { + vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) + mode = KERN_WARNING; + else mode = KERN_INFO; - loglevel = LOG_ELS; - }
- /* PRLI failed */ - lpfc_printf_vlog(vport, mode, loglevel, - "2754 PRLI failure DID:%06X Status:x%x/x%x, " + /* Warn PRLI status */ + lpfc_printf_vlog(vport, mode, LOG_ELS, + "2754 PRLI DID:%06X Status:x%x/x%x, " "data: x%x x%x x%x\n", ndlp->nlp_DID, ulp_status, ulp_word4, ndlp->nlp_state, @@ -2848,11 +2844,11 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } goto out; } - /* ADISC failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2755 ADISC failure DID:%06X Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + /* Warn ADISC status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2755 ADISC DID:%06X Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4); lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC);
@@ -3039,12 +3035,12 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * discovery. The PLOGI will retry. */ if (ulp_status) { - /* LOGO failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "2756 LOGO failure, No Retry DID:%06X " - "Status:x%x/x%x\n", - ndlp->nlp_DID, ulp_status, - ulp_word4); + /* Warn LOGO status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "2756 LOGO, No Retry DID:%06X " + "Status:x%x/x%x\n", + ndlp->nlp_DID, ulp_status, + ulp_word4);
if (lpfc_error_lost_link(vport, ulp_status, ulp_word4)) skip_recovery = 1; @@ -4831,11 +4827,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && (cmd == ELS_CMD_FDISC) && (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){ - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0125 FDISC Failed (x%x). " - "Fabric out of resources\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0125 FDISC (x%x). " + "Fabric out of resources\n", + stat.un.lsRjtError); lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_RSCS); } @@ -4871,11 +4866,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, LSEXP_NOTHING_MORE) { vport->fc_sparam.cmn.bbRcvSizeMsb &= 0xf; retry = 1; - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0820 FLOGI Failed (x%x). " - "BBCredit Not Supported\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0820 FLOGI (x%x). " + "BBCredit Not Supported\n", + stat.un.lsRjtError); } break;
@@ -4885,11 +4879,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) || (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID)) ) { - lpfc_printf_vlog(vport, KERN_ERR, - LOG_TRACE_EVENT, - "0122 FDISC Failed (x%x). " - "Fabric Detected Bad WWN\n", - stat.un.lsRjtError); + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0122 FDISC (x%x). " + "Fabric Detected Bad WWN\n", + stat.un.lsRjtError); lpfc_vport_set_state(vport, FC_VPORT_FABRIC_REJ_WWN); } @@ -5344,8 +5337,8 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, u32 ulp_status, ulp_word4, tmo, did, iotag;
if (!vport) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "3177 ELS response failed\n"); + lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, + "3177 null vport in ELS rsp\n"); goto out; } if (cmdiocb->context_un.mbox) @@ -11311,10 +11304,10 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) goto out; - /* FDISC failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0126 FDISC failed. (x%x/x%x)\n", - ulp_status, ulp_word4); + /* Warn FDISC status */ + lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, + "0126 FDISC cmpl status: x%x/x%x)\n", + ulp_status, ulp_word4); goto fdisc_failed; }
From: José Roberto de Souza jose.souza@intel.com
[ Upstream commit 6c10ba06bb1b48acce6d4d9c1e33beb9954f1788 ]
By default xe_bb_create_job() appends a MI_BATCH_BUFFER_END to batch buffer, this is not a problem if batch buffer is only used once but oa reuses the batch buffer for the same metric and at each call it appends a MI_BATCH_BUFFER_END, printing the warning below and then overflowing.
[ 381.072016] ------------[ cut here ]------------ [ 381.072019] xe 0000:00:02.0: [drm] Assertion `bb->len * 4 + bb_prefetch(q->gt) <= size` failed! platform: LUNARLAKE subplatform: 1 graphics: Xe2_LPG / Xe2_HPG 20.04 step B0 media: Xe2_LPM / Xe2_HPM 20.00 step B0 tile: 0 VRAM 0 B GT: 0 type 1
So here checking if batch buffer already have MI_BATCH_BUFFER_END if not append it.
v2: - simply fix, suggestion from Ashutosh
Cc: Ashutosh Dixit ashutosh.dixit@intel.com Signed-off-by: José Roberto de Souza jose.souza@intel.com Reviewed-by: Ashutosh Dixit ashutosh.dixit@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20240912153842.35813-1-jose.so... (cherry picked from commit 9ba0e0f30ca42a98af3689460063edfb6315718a) Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a13e0b3a169ed..ef777dbdf4ecc 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo);
- bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) + bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
From: Christian König christian.koenig@amd.com
[ Upstream commit 7181faaa4703705939580abffaf9cb5d6b50dbb7 ]
This was only used as workaround for recovering the page tables after VRAM was lost and is no longer necessary after the function amdgpu_vm_bo_reset_state_machine() started to do the same.
Compute never used shadows either, so the only proplematic case left is SVM and that is most likely not recoverable in any way when VRAM is lost.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Lijo Lazar lijo.lazar@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 87 +-------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 67 +--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 21 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 19 +---- 7 files changed, 6 insertions(+), 265 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 137a88b8de453..a1b2bf3db55b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1091,10 +1091,6 @@ struct amdgpu_device {
struct amdgpu_virt virt;
- /* link all shadow bo */ - struct list_head shadow_list; - struct mutex shadow_list_lock; - /* record hw reset is performed */ bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bcacf2e35eba0..dfd468729d52b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4073,9 +4073,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->wb.lock);
- INIT_LIST_HEAD(&adev->shadow_list); - mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list); @@ -4980,80 +4977,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) return 0; }
-/** - * amdgpu_device_recover_vram - Recover some VRAM contents - * - * @adev: amdgpu_device pointer - * - * Restores the contents of VRAM buffers from the shadows in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * - * Returns: - * 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{ - struct dma_fence *fence = NULL, *next = NULL; - struct amdgpu_bo *shadow; - struct amdgpu_bo_vm *vmbo; - long r = 1, tmo; - - if (amdgpu_sriov_runtime(adev)) - tmo = msecs_to_jiffies(8000); - else - tmo = msecs_to_jiffies(100); - - dev_info(adev->dev, "recover vram bo from shadow start\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { - /* If vm is compute context or adev is APU, shadow will be NULL */ - if (!vmbo->shadow) - continue; - shadow = vmbo->shadow; - - /* No need to recover an evicted BO */ - if (!shadow->tbo.resource || - shadow->tbo.resource->mem_type != TTM_PL_TT || - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) - continue; - - r = amdgpu_bo_restore_shadow(shadow, &next); - if (r) - break; - - if (fence) { - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - fence = next; - if (tmo == 0) { - r = -ETIMEDOUT; - break; - } else if (tmo < 0) { - r = tmo; - break; - } - } else { - fence = next; - } - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - - if (r < 0 || tmo <= 0) { - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); - return -EIO; - } - - dev_info(adev->dev, "recover vram bo from shadow done\n"); - return 0; -} - - /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * @@ -5116,12 +5039,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r;
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev); - r = amdgpu_device_recover_vram(adev); - } - if (r) - return r;
/* need to be called during full access so we can't do it later like * bare-metal does. @@ -5541,9 +5460,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, } }
- if (!r) - r = amdgpu_device_recover_vram(tmp_adev); - else + if (r) tmp_adev->asic_reset_res = r; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e32161f6b67a3..a987f671b1d53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_destroy(tbo); }
-static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; - struct amdgpu_bo_vm *vmbo; - - bo = shadow_bo->parent; - vmbo = to_amdgpu_bo_vm(bo); - /* in case amdgpu_device_recover_vram got NULL of bo->parent */ - if (!list_empty(&vmbo->shadow_list)) { - mutex_lock(&adev->shadow_list_lock); - list_del_init(&vmbo->shadow_list); - mutex_unlock(&adev->shadow_list_lock); - } - - amdgpu_bo_destroy(tbo); -} - /** * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo * @bo: buffer object to be checked @@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_bo_destroy || - bo->destroy == &amdgpu_bo_user_destroy || - bo->destroy == &amdgpu_bo_vm_destroy) + bo->destroy == &amdgpu_bo_user_destroy) return true;
return false; @@ -722,52 +703,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, return r; }
-/** - * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list - * - * @vmbo: BO that will be inserted into the shadow list - * - * Insert a BO to the shadow list. - */ -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); - - mutex_lock(&adev->shadow_list_lock); - list_add_tail(&vmbo->shadow_list, &adev->shadow_list); - vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); - vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; - mutex_unlock(&adev->shadow_list_lock); -} - -/** - * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow - * - * @shadow: &amdgpu_bo shadow to be restored - * @fence: dma_fence associated with the operation - * - * Copies a buffer object's shadow content back to the object. - * This is used for recovering a buffer from its shadow in case of a gpu - * reset where vram context may be lost. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) - -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t shadow_addr, parent_addr; - - shadow_addr = amdgpu_bo_gpu_offset(shadow); - parent_addr = amdgpu_bo_gpu_offset(shadow->parent); - - return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, - amdgpu_bo_size(shadow), NULL, fence, - true, false, 0); -} - /** * amdgpu_bo_kmap - map an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be mapped diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659a..a4fa1f296daec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -130,8 +130,6 @@ struct amdgpu_bo_user {
struct amdgpu_bo_vm { struct amdgpu_bo bo; - struct amdgpu_bo *shadow; - struct list_head shadow_list; struct amdgpu_vm_bo_base entries[]; };
@@ -269,22 +267,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; }
-/** - * amdgpu_bo_shadowed - check if the BO is shadowed - * - * @bo: BO to be tested. - * - * Returns: - * NULL if not shadowed or else return a BO pointer. - */ -static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) -{ - if (bo->tbo.type == ttm_bo_type_kernel) - return to_amdgpu_bo_vm(bo)->shadow; - - return NULL; -} - bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@@ -343,9 +325,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats); -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, - struct dma_fence **fence); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a060c28f0877c..8cda1d02dade3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r;
@@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock);
bo = bo_base->bo; - shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo); if (r) return r; - if (shadow) { - r = validate(param, shadow); - if (r) - return r; - }
if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -2123,10 +2116,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) - bo = bo->parent; - for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm;
@@ -2454,7 +2443,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); if (r) { - amdgpu_bo_unref(&root->shadow); amdgpu_bo_unref(&root_bo); goto error_free_delayed; } @@ -2546,11 +2534,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true;
- /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - - goto unreserve_bo; - unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index e39d6e7643bfb..c8e0b8cfd3363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r;
- if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV;
@@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; unsigned int num_entries; - int r;
memset(&bp, 0, sizeof(bp));
@@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv;
- r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_pt_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.xcp_id_plus1 = xcp_id + 1; - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; + return amdgpu_bo_create_vm(adev, &bp, vmbo); }
/** @@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, return 0;
error_free_pt: - amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt_bo); return r; } @@ -581,17 +534,10 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, */ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) { - struct amdgpu_bo *shadow; - if (!entry->bo) return;
entry->bo->vm_bo = NULL; - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
spin_lock(&entry->vm->status_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 9b748d7058b5c..390432a22ddd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -35,16 +35,7 @@ */ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) { - int r; - - r = amdgpu_ttm_alloc_gart(&table->bo.tbo); - if (r) - return r; - - if (table->shadow) - r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); - - return r; + return amdgpu_ttm_alloc_gart(&table->bo.tbo); }
/* Allocate a new job for @count PTE updates */ @@ -273,17 +264,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (!p->pages_addr) { /* set page commands needed */ - if (vmbo->shadow) - amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, - count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); return 0; }
/* copy commands needed */ - ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * - (vmbo->shadow ? 2 : 1); + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */ ndw -= 7; @@ -298,8 +285,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; }
- if (vmbo->shadow) - amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;
Hi Sasha,
Am 04.10.24 um 20:17 schrieb Sasha Levin:
From: Christian König christian.koenig@amd.com
[ Upstream commit 7181faaa4703705939580abffaf9cb5d6b50dbb7 ]
This was only used as workaround for recovering the page tables after VRAM was lost and is no longer necessary after the function amdgpu_vm_bo_reset_state_machine() started to do the same.
Compute never used shadows either, so the only proplematic case left is SVM and that is most likely not recoverable in any way when VRAM is lost.
why is that backported to older kernels? It's basically just removing an old and now unused feature.
Lijo pointed out a related bug fixed by removing the feature, but that only happens extremely rarely and in my view doesn't really justify a backport.
Regards, Christian.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Lijo Lazar lijo.lazar@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 87 +-------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 67 +--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 21 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 19 +---- 7 files changed, 6 insertions(+), 265 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 137a88b8de453..a1b2bf3db55b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1091,10 +1091,6 @@ struct amdgpu_device { struct amdgpu_virt virt;
- /* link all shadow bo */
- struct list_head shadow_list;
- struct mutex shadow_list_lock;
- /* record hw reset is performed */ bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bcacf2e35eba0..dfd468729d52b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4073,9 +4073,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->wb.lock);
- INIT_LIST_HEAD(&adev->shadow_list);
- mutex_init(&adev->shadow_list_lock);
- INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list); @@ -4980,80 +4977,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) return 0; } -/**
- amdgpu_device_recover_vram - Recover some VRAM contents
- @adev: amdgpu_device pointer
- Restores the contents of VRAM buffers from the shadows in GTT. Used to
- restore things like GPUVM page tables after a GPU reset where
- the contents of VRAM might be lost.
- Returns:
- 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{
- struct dma_fence *fence = NULL, *next = NULL;
- struct amdgpu_bo *shadow;
- struct amdgpu_bo_vm *vmbo;
- long r = 1, tmo;
- if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000);
- else
tmo = msecs_to_jiffies(100);
- dev_info(adev->dev, "recover vram bo from shadow start\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
/* If vm is compute context or adev is APU, shadow will be NULL */
if (!vmbo->shadow)
continue;
shadow = vmbo->shadow;
/* No need to recover an evicted BO */
if (!shadow->tbo.resource ||
shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) {
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
fence = next;
if (tmo == 0) {
r = -ETIMEDOUT;
break;
} else if (tmo < 0) {
r = tmo;
break;
}
} else {
fence = next;
}
- }
- mutex_unlock(&adev->shadow_list_lock);
- if (fence)
tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
- if (r < 0 || tmo <= 0) {
dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
return -EIO;
- }
- dev_info(adev->dev, "recover vram bo from shadow done\n");
- return 0;
-}
- /**
- amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
@@ -5116,12 +5039,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r;
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
- }
- if (r)
return r;
/* need to be called during full access so we can't do it later like * bare-metal does. @@ -5541,9 +5460,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, } }
if (!r)
r = amdgpu_device_recover_vram(tmp_adev);
else
}if (r) tmp_adev->asic_reset_res = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e32161f6b67a3..a987f671b1d53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_destroy(tbo); } -static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) -{
- struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
- struct amdgpu_bo_vm *vmbo;
- bo = shadow_bo->parent;
- vmbo = to_amdgpu_bo_vm(bo);
- /* in case amdgpu_device_recover_vram got NULL of bo->parent */
- if (!list_empty(&vmbo->shadow_list)) {
mutex_lock(&adev->shadow_list_lock);
list_del_init(&vmbo->shadow_list);
mutex_unlock(&adev->shadow_list_lock);
- }
- amdgpu_bo_destroy(tbo);
-}
- /**
- amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
- @bo: buffer object to be checked
@@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_bo_destroy ||
bo->destroy == &amdgpu_bo_user_destroy ||
bo->destroy == &amdgpu_bo_vm_destroy)
return true;bo->destroy == &amdgpu_bo_user_destroy)
return false; @@ -722,52 +703,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, return r; } -/**
- amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
- @vmbo: BO that will be inserted into the shadow list
- Insert a BO to the shadow list.
- */
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) -{
- struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
- mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
- vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
- vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
- mutex_unlock(&adev->shadow_list_lock);
-}
-/**
- amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
- @shadow: &amdgpu_bo shadow to be restored
- @fence: dma_fence associated with the operation
- Copies a buffer object's shadow content back to the object.
- This is used for recovering a buffer from its shadow in case of a gpu
- reset where vram context may be lost.
- Returns:
- 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t shadow_addr, parent_addr;
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
- return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
amdgpu_bo_size(shadow), NULL, fence,
true, false, 0);
-}
- /**
- amdgpu_bo_kmap - map an &amdgpu_bo buffer object
- @bo: &amdgpu_bo buffer object to be mapped
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659a..a4fa1f296daec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -130,8 +130,6 @@ struct amdgpu_bo_user { struct amdgpu_bo_vm { struct amdgpu_bo bo;
- struct amdgpu_bo *shadow;
- struct list_head shadow_list; struct amdgpu_vm_bo_base entries[]; };
@@ -269,22 +267,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; } -/**
- amdgpu_bo_shadowed - check if the BO is shadowed
- @bo: BO to be tested.
- Returns:
- NULL if not shadowed or else return a BO pointer.
- */
-static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) -{
- if (bo->tbo.type == ttm_bo_type_kernel)
return to_amdgpu_bo_vm(bo)->shadow;
- return NULL;
-}
- bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@@ -343,9 +325,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats); -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a060c28f0877c..8cda1d02dade3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base;
- struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r;
@@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo;
shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo); if (r) return r;
if (shadow) {
r = validate(param, shadow);
if (r)
return r;
}
if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -2123,10 +2116,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */
- if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
bo = bo->parent;
- for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm;
@@ -2454,7 +2443,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); if (r) {
amdgpu_bo_unref(&root_bo); goto error_free_delayed; }amdgpu_bo_unref(&root->shadow);
@@ -2546,11 +2534,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true;
- /* Free the shadow bo for compute VM */
- amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
- goto unreserve_bo;
- unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index e39d6e7643bfb..c8e0b8cfd3363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r;
- if (vmbo->shadow) {
struct amdgpu_bo *shadow = vmbo->shadow;
r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
if (r)
return r;
- }
- if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV;
@@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { struct amdgpu_bo_param bp;
- struct amdgpu_bo *bo;
- struct dma_resv *resv; unsigned int num_entries;
- int r;
memset(&bp, 0, sizeof(bp)); @@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv;
- r = amdgpu_bo_create_vm(adev, &bp, vmbo);
- if (r)
return r;
- bo = &(*vmbo)->bo;
- if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
(*vmbo)->shadow = NULL;
return 0;
- }
- if (!bp.resv)
WARN_ON(dma_resv_lock(bo->tbo.base.resv,
NULL));
- resv = bp.resv;
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_pt_size(adev, level);
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- bp.xcp_id_plus1 = xcp_id + 1;
- r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
- if (!resv)
dma_resv_unlock(bo->tbo.base.resv);
- if (r) {
amdgpu_bo_unref(&bo);
return r;
- }
- amdgpu_bo_add_to_shadow_list(*vmbo);
- return 0;
- return amdgpu_bo_create_vm(adev, &bp, vmbo); }
/** @@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, return 0; error_free_pt:
- amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt_bo); return r; }
@@ -581,17 +534,10 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, */ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) {
- struct amdgpu_bo *shadow;
- if (!entry->bo) return;
entry->bo->vm_bo = NULL;
- shadow = amdgpu_bo_shadowed(entry->bo);
- if (shadow) {
ttm_bo_set_bulk_move(&shadow->tbo, NULL);
amdgpu_bo_unref(&shadow);
- } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
spin_lock(&entry->vm->status_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 9b748d7058b5c..390432a22ddd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -35,16 +35,7 @@ */ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) {
- int r;
- r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
- if (r)
return r;
- if (table->shadow)
r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
- return r;
- return amdgpu_ttm_alloc_gart(&table->bo.tbo); }
/* Allocate a new job for @count PTE updates */ @@ -273,17 +264,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (!p->pages_addr) { /* set page commands needed */
if (vmbo->shadow)
amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
}count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); return 0;
/* copy commands needed */
ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
(vmbo->shadow ? 2 : 1);
ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */ ndw -= 7; @@ -298,8 +285,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; }
if (vmbo->shadow)
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
pe += nptes * 8;
On Tue, Oct 08, 2024 at 08:46:01AM +0200, Christian König wrote:
Hi Sasha,
Am 04.10.24 um 20:17 schrieb Sasha Levin:
From: Christian König christian.koenig@amd.com
[ Upstream commit 7181faaa4703705939580abffaf9cb5d6b50dbb7 ]
This was only used as workaround for recovering the page tables after VRAM was lost and is no longer necessary after the function amdgpu_vm_bo_reset_state_machine() started to do the same.
Compute never used shadows either, so the only proplematic case left is SVM and that is most likely not recoverable in any way when VRAM is lost.
why is that backported to older kernels? It's basically just removing an old and now unused feature.
Lijo pointed out a related bug fixed by removing the feature, but that only happens extremely rarely and in my view doesn't really justify a backport.
I'll drop it then, thanks!
From: Alex Hung alex.hung@amd.com
[ Upstream commit ff599ef6970ee000fa5bc38d02fa5ff5f3fc7575 ]
[WHAT & HOW] se is null checked previously in the same function, indicating it might be null; therefore, it must be checked when used again.
This fixes 1 FORWARD_NULL issue reported by Coverity.
Acked-by: Alex Hung alex.hung@amd.com Reviewed-by: Rodrigo Siqueira rodrigo.siqueira@amd.com Signed-off-by: Alex Hung alex.hung@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 85a2ef82afa53..82390814e6c6a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1832,7 +1832,7 @@ bool dc_validate_boot_timing(const struct dc *dc, if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false;
- if (!se->funcs->dp_get_pixel_format) + if (!se || !se->funcs->dp_get_pixel_format) return false;
if (!se->funcs->dp_get_pixel_format(
From: Qianqiang Liu qianqiang.liu@163.com
[ Upstream commit 5b97eebcce1b4f3f07a71f635d6aa3af96c236e7 ]
syzbot has found a NULL pointer dereference bug in fbcon. Here is the simplified C reproducer:
struct param { uint8_t type; struct tiocl_selection ts; };
int main() { struct fb_con2fbmap con2fb; struct param param;
int fd = open("/dev/fb1", 0, 0);
con2fb.console = 0x19; con2fb.framebuffer = 0; ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb);
param.type = 2; param.ts.xs = 0; param.ts.ys = 0; param.ts.xe = 0; param.ts.ye = 0; param.ts.sel_mode = 0;
int fd1 = open("/dev/tty1", O_RDWR, 0); ioctl(fd1, TIOCLINUX, ¶m);
con2fb.console = 1; con2fb.framebuffer = 0; ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb);
return 0; }
After calling ioctl(fd1, TIOCLINUX, ¶m), the subsequent ioctl(fd, FBIOPUT_CON2FBMAP, &con2fb) causes the kernel to follow a different execution path:
set_con2fb_map -> con2fb_init_display -> fbcon_set_disp -> redraw_screen -> hide_cursor -> clear_selection -> highlight -> invert_screen -> do_update_region -> fbcon_putcs -> ops->putcs
Since ops->putcs is a NULL pointer, this leads to a kernel panic. To prevent this, we need to call set_blitting_type() within set_con2fb_map() to properly initialize ops->putcs.
Reported-by: syzbot+3d613ae53c031502687a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=3d613ae53c031502687a Tested-by: syzbot+3d613ae53c031502687a@syzkaller.appspotmail.com Signed-off-by: Qianqiang Liu qianqiang.liu@163.com Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/video/fbdev/core/fbcon.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 3f7333dca508c..fedd796c9a5cd 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -847,6 +847,8 @@ static int set_con2fb_map(int unit, int newidx, int user) return err;
fbcon_add_cursor_work(info); + } else if (vc) { + set_blitting_type(vc, info); }
con2fb_map[unit] = newidx;
From: Enzo Matsumiya ematsumiya@suse.de
[ Upstream commit b0abcd65ec545701b8793e12bc27dc98042b151a ]
Doing an async decryption (large read) crashes with a slab-use-after-free way down in the crypto API.
Reproducer: # mount.cifs -o ...,seal,esize=1 //srv/share /mnt # dd if=/mnt/largefile of=/dev/null ... [ 194.196391] ================================================================== [ 194.196844] BUG: KASAN: slab-use-after-free in gf128mul_4k_lle+0xc1/0x110 [ 194.197269] Read of size 8 at addr ffff888112bd0448 by task kworker/u77:2/899 [ 194.197707] [ 194.197818] CPU: 12 UID: 0 PID: 899 Comm: kworker/u77:2 Not tainted 6.11.0-lku-00028-gfca3ca14a17a-dirty #43 [ 194.198400] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-prebuilt.qemu.org 04/01/2014 [ 194.199046] Workqueue: smb3decryptd smb2_decrypt_offload [cifs] [ 194.200032] Call Trace: [ 194.200191] <TASK> [ 194.200327] dump_stack_lvl+0x4e/0x70 [ 194.200558] ? gf128mul_4k_lle+0xc1/0x110 [ 194.200809] print_report+0x174/0x505 [ 194.201040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 194.201352] ? srso_return_thunk+0x5/0x5f [ 194.201604] ? __virt_addr_valid+0xdf/0x1c0 [ 194.201868] ? gf128mul_4k_lle+0xc1/0x110 [ 194.202128] kasan_report+0xc8/0x150 [ 194.202361] ? gf128mul_4k_lle+0xc1/0x110 [ 194.202616] gf128mul_4k_lle+0xc1/0x110 [ 194.202863] ghash_update+0x184/0x210 [ 194.203103] shash_ahash_update+0x184/0x2a0 [ 194.203377] ? __pfx_shash_ahash_update+0x10/0x10 [ 194.203651] ? srso_return_thunk+0x5/0x5f [ 194.203877] ? crypto_gcm_init_common+0x1ba/0x340 [ 194.204142] gcm_hash_assoc_remain_continue+0x10a/0x140 [ 194.204434] crypt_message+0xec1/0x10a0 [cifs] [ 194.206489] ? __pfx_crypt_message+0x10/0x10 [cifs] [ 194.208507] ? srso_return_thunk+0x5/0x5f [ 194.209205] ? srso_return_thunk+0x5/0x5f [ 194.209925] ? srso_return_thunk+0x5/0x5f [ 194.210443] ? srso_return_thunk+0x5/0x5f [ 194.211037] decrypt_raw_data+0x15f/0x250 [cifs] [ 194.212906] ? __pfx_decrypt_raw_data+0x10/0x10 [cifs] [ 194.214670] ? srso_return_thunk+0x5/0x5f [ 194.215193] smb2_decrypt_offload+0x12a/0x6c0 [cifs]
This is because TFM is being used in parallel.
Fix this by allocating a new AEAD TFM for async decryption, but keep the existing one for synchronous READ cases (similar to what is done in smb3_calc_signature()).
Also remove the calls to aead_request_set_callback() and crypto_wait_req() since it's always going to be a synchronous operation.
Signed-off-by: Enzo Matsumiya ematsumiya@suse.de Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/smb/client/smb2ops.c | 47 ++++++++++++++++++++++++----------------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e6540072ffb0e..614a7bbdb007d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4307,7 +4307,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) */ static int crypt_message(struct TCP_Server_Info *server, int num_rqst, - struct smb_rqst *rqst, int enc) + struct smb_rqst *rqst, int enc, struct crypto_aead *tfm) { struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base; @@ -4318,8 +4318,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; u8 *iv; - DECLARE_CRYPTO_WAIT(wait); - struct crypto_aead *tfm; unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); void *creq; size_t sensitive_size; @@ -4331,14 +4329,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, return rc; }
- rc = smb3_crypto_aead_allocate(server); - if (rc) { - cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); - return rc; - } - - tfm = enc ? server->secmech.enc : server->secmech.dec; - if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE); @@ -4378,11 +4368,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len);
- aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &wait); - - rc = crypto_wait_req(enc ? crypto_aead_encrypt(req) - : crypto_aead_decrypt(req), &wait); + rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); @@ -4488,7 +4474,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, /* fill the 1st iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq, server->cipher_type);
- rc = crypt_message(server, num_rqst, new_rq, 1); + rc = crypt_message(server, num_rqst, new_rq, 1, server->secmech.enc); cifs_dbg(FYI, "Encrypt message returned %d\n", rc); if (rc) goto err_free; @@ -4513,8 +4499,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, unsigned int buf_data_size, struct iov_iter *iter, bool is_offloaded) { - struct kvec iov[2]; + struct crypto_aead *tfm; struct smb_rqst rqst = {NULL}; + struct kvec iov[2]; size_t iter_size = 0; int rc;
@@ -4530,9 +4517,31 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, iter_size = iov_iter_count(iter); }
- rc = crypt_message(server, 1, &rqst, 0); + if (is_offloaded) { + if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + else + tfm = crypto_alloc_aead("ccm(aes)", 0, 0); + if (IS_ERR(tfm)) { + rc = PTR_ERR(tfm); + cifs_server_dbg(VFS, "%s: Failed alloc decrypt TFM, rc=%d\n", __func__, rc); + + return rc; + } + } else { + if (unlikely(!server->secmech.dec)) + return -EIO; + + tfm = server->secmech.dec; + } + + rc = crypt_message(server, 1, &rqst, 0, tfm); cifs_dbg(FYI, "Decrypt message returned %d\n", rc);
+ if (is_offloaded) + crypto_free_aead(tfm); + if (rc) return rc;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 88dc49d670371..3d9e6e15dd900 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1265,6 +1265,12 @@ SMB2_negotiate(const unsigned int xid, else cifs_server_dbg(VFS, "Missing expected negotiate contexts\n"); } + + if (server->cipher_type && !rc) { + rc = smb3_crypto_aead_allocate(server); + if (rc) + cifs_server_dbg(VFS, "%s: crypto alloc failed, rc=%d\n", __func__, rc); + } neg_exit: free_rsp_buf(resp_buftype, rsp); return rc;
From: Andrey Shumilin shum.sdl@nppct.ru
[ Upstream commit 9cf14f5a2746c19455ce9cb44341b5527b5e19c3 ]
The values of the variables xres and yres are placed in strbuf. These variables are obtained from strbuf1. The strbuf1 array contains digit characters and a space if the array contains non-digit characters. Then, when executing sprintf(strbuf, "%ux%ux8", xres, yres); more than 16 bytes will be written to strbuf. It is suggested to increase the size of the strbuf array to 24.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Signed-off-by: Andrey Shumilin shum.sdl@nppct.ru Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/video/fbdev/sis/sis_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 009bf1d926448..75033e6be15ab 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -183,7 +183,7 @@ static void sisfb_search_mode(char *name, bool quiet) { unsigned int j = 0, xres = 0, yres = 0, depth = 0, rate = 0; int i = 0; - char strbuf[16], strbuf1[20]; + char strbuf[24], strbuf1[20]; char *nameptr = name;
/* We don't know the hardware specs yet and there is no ivideo */
linux-stable-mirror@lists.linaro.org