When panic happens while BAU is active there is a chance that outstanding broadcasts tie up BAU resources enough to cause timeouts in the UV ASIC. These timeouts are hardware errors that immediately bring down the system, preventing kdump from completing.
Add uv_bau_crash_shutdown() to bring BAU to quiescence during panic before continuing with the native crash shutdown. Assign uv_bau_crash_shutdown to machine_ops during init.
Signed-off-by: Andrew Banman abanman@hpe.com --- arch/x86/platform/uv/tlb_uv.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b36caae..e7f9aea 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -22,6 +22,7 @@ #include <asm/tsc.h> #include <asm/irq_vectors.h> #include <asm/timer.h> +#include <asm/reboot.h>
static struct bau_operations ops __ro_after_init;
@@ -2197,6 +2198,32 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode) .wait_completion = uv4_wait_completion, };
+#ifdef CONFIG_KEXEC_CORE +/* + * Bring BAU to quiesence by disabling future broadcasts and abandoning + * current broadcasts during panic. + */ +void uv_bau_crash_shutdown(struct pt_regs *regs) +{ + set_bau_off(); + nobau_perm = 1; + + for_each_possible_blade(uvhub) { + if (!uv_blade_nr_possible_cpus(uvhub)) + continue; + int pnode = uv_blade_to_pnode(uvhub); + /* Set STATUS registers to idle to free source cpus */ + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0); + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0); + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0); + /* Clear TIMEOUT and PENDING bits to free up BAU resources */ + ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF); + } + + native_machine_crash_shutdown(regs); +} +#endif + /* * Initialization of BAU-related structures */ @@ -2269,6 +2296,10 @@ static int __init uv_bau_init(void) } }
+#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = uv_bau_crash_shutdown; +#endif + return 0;
err_bau_disable:
On Sun, Jun 03, 2018 at 11:42:32AM -0500, Andrew Banman wrote:
When panic happens while BAU is active there is a chance that outstanding broadcasts tie up BAU resources enough to cause timeouts in the UV ASIC. These timeouts are hardware errors that immediately bring down the system, preventing kdump from completing.
Add uv_bau_crash_shutdown() to bring BAU to quiescence during panic before continuing with the native crash shutdown. Assign uv_bau_crash_shutdown to machine_ops during init.
Signed-off-by: Andrew Banman abanman@hpe.com
arch/x86/platform/uv/tlb_uv.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+)
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
Hi Andrew,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on tip/auto-latest] [also build test ERROR on v4.17-rc7 next-20180601] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Andrew-Banman/x86-platform-uv-BAU-g... config: x86_64-allyesconfig (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64
All error/warnings (new ones prefixed by >>):
In file included from arch/x86/platform/uv/tlb_uv.c:19:0: arch/x86/platform/uv/tlb_uv.c: In function 'uv_bau_crash_shutdown':
arch/x86/platform/uv/tlb_uv.c:2211:26: error: 'uvhub' undeclared (first use in this function)
for_each_possible_blade(uvhub) { ^ arch/x86/include/asm/uv/uv_hub.h:437:8: note: in definition of macro 'for_each_possible_blade' for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++) ^~~ arch/x86/platform/uv/tlb_uv.c:2211:26: note: each undeclared identifier is reported only once for each function it appears in for_each_possible_blade(uvhub) { ^ arch/x86/include/asm/uv/uv_hub.h:437:8: note: in definition of macro 'for_each_possible_blade' for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++) ^~~
arch/x86/platform/uv/tlb_uv.c:2214:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
int pnode = uv_blade_to_pnode(uvhub); ^~~
vim +/uvhub +2211 arch/x86/platform/uv/tlb_uv.c
2200 2201 #ifdef CONFIG_KEXEC_CORE 2202 /* 2203 * Bring BAU to quiesence by disabling future broadcasts and abandoning 2204 * current broadcasts during panic. 2205 */ 2206 void uv_bau_crash_shutdown(struct pt_regs *regs) 2207 { 2208 set_bau_off(); 2209 nobau_perm = 1; 2210
2211 for_each_possible_blade(uvhub) {
2212 if (!uv_blade_nr_possible_cpus(uvhub)) 2213 continue;
2214 int pnode = uv_blade_to_pnode(uvhub);
2215 /* Set STATUS registers to idle to free source cpus */ 2216 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0); 2217 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0); 2218 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0); 2219 /* Clear TIMEOUT and PENDING bits to free up BAU resources */ 2220 ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF); 2221 } 2222 2223 native_machine_crash_shutdown(regs); 2224 } 2225 #endif 2226
--- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
linux-stable-mirror@lists.linaro.org