The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040111-friend-dispersal-bc2a@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040110-spongy-stress-e02e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040100-clinking-stylized-0888@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040114-diaper-unlovable-0dab@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
When ident_pud_init() uses only gbpages to create identity maps, large
ranges of addresses not actually requested can be included in the
resulting table; a 4K request will map a full GB. On UV systems, this
ends up including regions that will cause hardware to halt the system
if accessed (these are marked "reserved" by BIOS). Even processor
speculation into these regions is enough to trigger the system halt.
And MTRRs cannot be used to restrict this speculation, there are not
enough MTRRs to cover all the reserved regions.
The fix for that would be to only use gbpages when map creation
requests include the full GB page of space, and falling back to using
smaller 2M pages when only portions of a GB page are included in the
request.
But on some other systems, possibly due to buggy bios, that solution
leaves some areas out of the identity map that are needed for kexec to
succeed. It is believed that these areas are not marked properly for
map_acpi_tables() in arch/x86/kernel/machine_kexec_64.c to catch and
map them. The nogbpages kernel command line option also causes these
systems to fail even without these changes.
So, create kexec identity maps using full GB pages on all platforms
but UV; on UV, use narrower 2MB pages in the identity map where a full
GB page would include areas outside the region requested.
No attempt is made to coalesce mapping requests. If a request requires
a map entry at the 2M (pmd) level, subsequent mapping requests within
the same 1G region will also be at the pmd level, even if adjacent or
overlapping such requests could have been combined to map a full
gbpage. Existing usage starts with larger regions and then adds
smaller regions, so this should not have any great consequence.
Signed-off-by: Steve Wahl <steve.wahl(a)hpe.com>
Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.")
Reported-by: Pavin Joseph <me(a)pavinjoseph.com>
Closes: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjosep…
Link: https://lore.kernel.org/all/20240322162135.3984233-1-steve.wahl@hpe.com/
Tested-by: Pavin Joseph <me(a)pavinjoseph.com>
Tested-by: Eric Hagberg <ehagberg(a)gmail.com>
Tested-by: Sarah Brofeldt <srhb(a)dbc.dk>
---
v4: Incorporate fix for regression on systems relying on gbpages
mapping more than the ranges actually requested for successful
kexec, by limiting the effects of the change to UV systems.
This patch based on tip/x86/urgent.
v3: per Dave Hansen review, re-arrange changelog info,
refactor code to use bool variable and split out conditions.
v2: per Dave Hansen review: Additional changelog info,
moved pud_large() check earlier in the code, and
improved the comment describing the conditions
that restrict gbpage usage.
arch/x86/include/asm/init.h | 1 +
arch/x86/kernel/machine_kexec_64.c | 10 ++++++++++
arch/x86/mm/ident_map.c | 24 +++++++++++++++++++-----
3 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..371d9faea8bc 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -10,6 +10,7 @@ struct x86_mapping_info {
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
bool direct_gbpages; /* PUD level 1GB page support */
+ bool direct_gbpages_only; /* use 1GB pages exclusively */
unsigned long kernpg_flag; /* kernel pagetable flag override */
};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..3a2f5d291a88 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/cpu.h>
+#include <asm/uv/uv.h>
#ifdef CONFIG_ACPI
/*
@@ -212,6 +213,15 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
if (direct_gbpages)
info.direct_gbpages = true;
+ /*
+ * UV systems need restrained use of gbpages in the identity
+ * maps to avoid system halts. But some other systems rely on
+ * using gbpages to expand mappings outside the regions
+ * actually listed, to include areas required for kexec but
+ * not explicitly named by the bios.
+ */
+ if (!is_uv_system())
+ info.direct_gbpages_only = true;
for (i = 0; i < nr_pfn_mapped; i++) {
mstart = pfn_mapped[i].start << PAGE_SHIFT;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..a538a54aba5d 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,32 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_leaf(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ if (!info->direct_gbpages_only) {
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the beginning: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+ }
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
base-commit: b6540de9b5c867b4c8bc31225db181cc017d8cc7
--
2.26.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040106-slacking-uncanny-50dc@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040105-simply-footpath-ff09@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040104-avid-embolism-6b9b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040103-scholar-tall-0cf6@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e68a458bcf5b5cb9c3624598bae28f08251601f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040152-bobtail-animate-4d38@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e68a458bcf5 ("scsi: libsas: Fix disk not being scanned in after being removed")
d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
7b27c5fe247b ("scsi: libsas: Stop hardcoding SAS address length")
15ba7806c316 ("scsi: libsas: Drop SAS_DPRINTK() and revise logs levels")
71a4a9923122 ("scsi: libsas: Drop sas_printk()")
d188e5db9d27 ("scsi: libsas: Use pr_fmt(fmt)")
32c850bf587f ("scsi: libsas: always unregister the old device if going to discover new")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e68a458bcf5b5cb9c3624598bae28f08251601f Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui(a)huawei.com>
Date: Thu, 7 Mar 2024 14:14:13 +0000
Subject: [PATCH] scsi: libsas: Fix disk not being scanned in after being
removed
As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
update PHY info"), do discovery will send a new SMP_DISCOVER and update
phy->phy_change_count. We found that if the disk is reconnected and phy
change_count changes at this time, the disk scanning process will not be
triggered.
Therefore, call sas_set_ex_phy() to update the PHY info with the results of
the last query. And because the previous phy info will be used when calling
sas_unregister_devs_sas_addr(), sas_unregister_devs_sas_addr() should be
called before sas_set_ex_phy().
Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
Signed-off-by: Xingui Yang <yangxingui(a)huawei.com>
Link: https://lore.kernel.org/r/20240307141413.48049-3-yangxingui@huawei.com
Reviewed-by: John Garry <john.g.garry(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index de9dee488277..5c261005b74e 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1945,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
@@ -1956,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out_free_resp;
}
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type);
+
if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out_free_resp;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1994,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out_free_resp;
}
/* we always have to delete the old device when we went here */
@@ -2003,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id);
+ res = sas_discover_new(dev, phy_id);
+out_free_resp:
+ kfree(disc_resp);
+ return res;
}
/**