The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 5290e88ba2c742ca77c5f5b690e5af549cfd8591
Gitweb: https://git.kernel.org/tip/5290e88ba2c742ca77c5f5b690e5af549cfd8591
Author: Steve Wahl <steve.wahl(a)hpe.com>
AuthorDate: Mon, 07 Aug 2023 09:17:30 -05:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Mon, 11 Sep 2023 10:06:22 -07:00
x86/platform/uv: Use alternate source for socket to node data
The UV code attempts to build a set of tables to allow it to do
bidirectional socket<=>node lookups.
But when nr_cpus is set to a smaller number than actually present, the
cpu_to_node() mapping information for unused CPUs is not available to
build_socket_tables(). This results in skipping some nodes or sockets
when creating the tables and leaving some -1's for later code to trip.
over, causing oopses.
The problem is that the socket<=>node lookups are created by doing a
loop over all CPUs, then looking up the CPU's APICID and socket. But
if a CPU is not present, there is no way to start this lookup.
Instead of looping over all CPUs, take CPUs out of the equation
entirely. Loop over all APICIDs which are mapped to a valid NUMA node.
Then just extract the socket-id from the APICID.
This avoid tripping over disabled CPUs.
Fixes: 8a50c5851927 ("x86/platform/uv: UV support for sub-NUMA clustering")
Signed-off-by: Steve Wahl <steve.wahl(a)hpe.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20230807141730.1117278-1-steve.wahl%40hpe.com
---
arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d9f5d74..205cee5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1533,7 +1533,7 @@ static void __init build_socket_tables(void)
{
struct uv_gam_range_entry *gre = uv_gre_table;
int nums, numn, nump;
- int cpu, i, lnid;
+ int i, lnid, apicid;
int minsock = _min_socket;
int maxsock = _max_socket;
int minpnode = _min_pnode;
@@ -1584,15 +1584,14 @@ static void __init build_socket_tables(void)
/* Set socket -> node values: */
lnid = NUMA_NO_NODE;
- for_each_possible_cpu(cpu) {
- int nid = cpu_to_node(cpu);
- int apicid, sockid;
+ for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) {
+ int nid = __apicid_to_node[apicid];
+ int sockid;
- if (lnid == nid)
+ if ((nid == NUMA_NO_NODE) || (lnid == nid))
continue;
lnid = nid;
- apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
After the commit in Fixes:, if a module that created a slab cache does not
release all of its allocated objects before destroying the cache (at rmmod
time), we might end up releasing the kmem_cache object without removing it
from the slab_caches list thus corrupting the list as kmem_cache_destroy()
ignores the return value from shutdown_cache(), which in turn never removes
the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
to release all of the cache's slabs.
This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
as after that ill release the system will immediately trip on list_add,
or list_del, assertions similar to the one shown below as soon as another
kmem_cache gets created, or destroyed:
[ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
[ 1041.219165] ------------[ cut here ]------------
[ 1041.221517] kernel BUG at lib/list_debug.c:62!
[ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G B W OE 6.5.0 #15
[ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
[ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0
Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
is to set slub_debug to poison the released objects and then just run
cat /proc/slabinfo after removing the module that leaks slab objects,
in which case the kernel will panic:
[ 50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
[ 50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G B W OE 6.5.0 #15
[ 50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
[ 50.972663] RIP: 0010:get_slabinfo+0x42/0xf0
This patch fixes this issue by properly checking shutdown_cache()'s
return value before taking the kmem_cache_release() branch.
Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
Signed-off-by: Rafael Aquini <aquini(a)redhat.com>
Cc: stable(a)vger.kernel.org
---
mm/slab_common.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cd71f9581e67..31e581dc6e85 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
- int refcnt;
+ int err;
bool rcu_set;
if (unlikely(!s) || !kasan_check_byte(s))
@@ -490,17 +490,20 @@ void kmem_cache_destroy(struct kmem_cache *s)
rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
- refcnt = --s->refcount;
- if (refcnt)
+ s->refcount--;
+ if (s->refcount) {
+ err = -EBUSY;
goto out_unlock;
+ }
- WARN(shutdown_cache(s),
+ err = shutdown_cache(s);
+ WARN(err,
"%s %s: Slab cache still has objects when called from %pS",
__func__, s->name, (void *)_RET_IP_);
out_unlock:
mutex_unlock(&slab_mutex);
cpus_read_unlock();
- if (!refcnt && !rcu_set)
+ if (!err && !rcu_set)
kmem_cache_release(s);
}
EXPORT_SYMBOL(kmem_cache_destroy);
--
2.41.0
stable-rc/linux-5.4.y build: 17 builds: 0 failed, 17 passed, 34 warnings (v5.4.256-240-gedabcd6633bf)
Full Build Summary: https://kernelci.org/build/stable-rc/branch/linux-5.4.y/kernel/v5.4.256-240…
Tree: stable-rc
Branch: linux-5.4.y
Git Describe: v5.4.256-240-gedabcd6633bf
Git Commit: edabcd6633bf9949283fee7b8ed58348f97cb549
Git URL: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Built: 7 unique architectures
Warnings Detected:
arc:
arm64:
defconfig (gcc-10): 3 warnings
defconfig+arm64-chromebook (gcc-10): 4 warnings
arm:
imx_v6_v7_defconfig (gcc-10): 1 warning
omap2plus_defconfig (gcc-10): 1 warning
i386:
allnoconfig (gcc-10): 2 warnings
i386_defconfig (gcc-10): 3 warnings
tinyconfig (gcc-10): 2 warnings
mips:
riscv:
x86_64:
allnoconfig (gcc-10): 4 warnings
tinyconfig (gcc-10): 4 warnings
x86_64_defconfig (gcc-10): 5 warnings
x86_64_defconfig+x86-chromebook (gcc-10): 5 warnings
Warnings summary:
7 ld: warning: creating DT_TEXTREL in a PIE
7 fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
4 ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
4 arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
3 ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
2 arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
2 arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
2 arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
2 arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
1 drivers/gpu/drm/mediatek/mtk_drm_gem.c:273:10: warning: returning ‘int’ from a function with return type ‘void *’ makes pointer from integer without a cast [-Wint-conversion]
Section mismatches summary:
1 WARNING: vmlinux.o(___ksymtab_gpl+vic_init_cascaded+0x0): Section mismatch in reference from the variable __ksymtab_vic_init_cascaded to the function .init.text:vic_init_cascaded()
================================================================================
Detailed per-defconfig build reports:
--------------------------------------------------------------------------------
32r2el_defconfig (mips, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
allnoconfig (x86_64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
allnoconfig (i386, gcc-10) — PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
defconfig (riscv, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
defconfig (arm64, gcc-10) — PASS, 0 errors, 3 warnings, 0 section mismatches
Warnings:
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
defconfig+arm64-chromebook (arm64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
arch/arm64/include/asm/memory.h:238:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
drivers/gpu/drm/mediatek/mtk_drm_gem.c:273:10: warning: returning ‘int’ from a function with return type ‘void *’ makes pointer from integer without a cast [-Wint-conversion]
--------------------------------------------------------------------------------
haps_hs_smp_defconfig (arc, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
i386_defconfig (i386, gcc-10) — PASS, 0 errors, 3 warnings, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
imx_v6_v7_defconfig (arm, gcc-10) — PASS, 0 errors, 1 warning, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
multi_v5_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
Section mismatches:
WARNING: vmlinux.o(___ksymtab_gpl+vic_init_cascaded+0x0): Section mismatch in reference from the variable __ksymtab_vic_init_cascaded to the function .init.text:vic_init_cascaded()
--------------------------------------------------------------------------------
multi_v7_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
omap2plus_defconfig (arm, gcc-10) — PASS, 0 errors, 1 warning, 0 section mismatches
Warnings:
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
--------------------------------------------------------------------------------
tinyconfig (i386, gcc-10) — PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
ld: arch/x86/boot/compressed/head_32.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
tinyconfig (x86_64, gcc-10) — PASS, 0 errors, 4 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.S:1756: Warning: no instruction mnemonic suffix given and no register operands; using default for `sysret'
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x151: unsupported intra-function call
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
vexpress_defconfig (arm, gcc-10) — PASS, 0 errors, 0 warnings, 0 section mismatches
--------------------------------------------------------------------------------
x86_64_defconfig (x86_64, gcc-10) — PASS, 0 errors, 5 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
--------------------------------------------------------------------------------
x86_64_defconfig+x86-chromebook (x86_64, gcc-10) — PASS, 0 errors, 5 warnings, 0 section mismatches
Warnings:
arch/x86/entry/entry_64.o: warning: objtool: .entry.text+0x1c1: unsupported intra-function call
arch/x86/entry/entry_64.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.
fs/quota/dquot.c:2611:1: warning: label ‘out’ defined but not used [-Wunused-label]
ld: arch/x86/boot/compressed/head_64.o: warning: relocation in read-only section `.head.text'
ld: warning: creating DT_TEXTREL in a PIE
---
For more info write to <info(a)kernelci.org>
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Apparently Acer Chromebook C740 (BDW-ULT) doesn't have the
eDP HPD line properly connected, and thus fails the new
HPD check during eDP probe. The result is that we lose the
eDP output.
I suspect all such machines would all be Chromebooks or other
Linux exclusive systems as the Windows driver likely wouldn't
work either. I did check a few other BDW machines here and
those do have eDP HPD connected, one of them even is a
different Chromebook (Samus).
To account for these funky machines let's skip the HPD check when
it looks like the eDP port is the only one using that specific AUX
channel. In case of multiple ports sharing the same AUX CH (eg. on
Asrock B250M-HDV) we still do the check and thus should correctly
ignore the eDP port in favor of the other DP port (usually a DP->VGA
converter).
Cc: stable(a)vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9264
Fixes: cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe")
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/intel_bios.c | 19 +++++++++++++++++++
drivers/gpu/drm/i915/display/intel_bios.h | 1 +
drivers/gpu/drm/i915/display/intel_dp.c | 7 ++++++-
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 858c959f7bab..aabecd2beb14 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -3540,6 +3540,25 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata)
return map_aux_ch(devdata->i915, devdata->child.aux_channel);
}
+bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata)
+{
+ u8 aux_channel;
+ int count = 0;
+
+ if (!devdata || !devdata->child.aux_channel)
+ return false;
+
+ aux_channel = devdata->child.aux_channel;
+
+ list_for_each_entry(devdata, &devdata->i915->display.vbt.display_devices, node) {
+ if (intel_bios_encoder_supports_dp(devdata) &&
+ aux_channel == devdata->child.aux_channel)
+ count++;
+ }
+
+ return count > 1;
+}
+
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata)
{
if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost)
diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
index 9680e3e92bb5..49e24b7cf675 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.h
+++ b/drivers/gpu/drm/i915/display/intel_bios.h
@@ -273,6 +273,7 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata);
+bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 2206b45bc78c..aa5f602b56fb 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5889,8 +5889,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
/*
* VBT and straps are liars. Also check HPD as that seems
* to be the most reliable piece of information available.
+ *
+ * ... expect on devices that forgot to hook HPD up for eDP
+ * (eg. Acer Chromebook C710), so we'll check it only if multiple
+ * ports are attempting to use the same AUX CH, according to VBT.
*/
- if (!intel_digital_port_connected(encoder)) {
+ if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) &&
+ !intel_digital_port_connected(encoder)) {
/*
* If this fails, presume the DPCD answer came
* from some other port using the same AUX CH.
--
2.41.0