April 2022 - Linux-stable-mirror

FAILED: patch "[PATCH] scsi: qla2xxx: Fix laggy FC remote port session recovery" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 713b415726f100f6644971e75ebfe1edbef1a390 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:59 -0800 Subject: [PATCH] scsi: qla2xxx: Fix laggy FC remote port session recovery For session recovery, driver relies on the dpc thread to initiate certain operations. The dpc thread runs exclusively without the Mailbox interface being occupied. A recent code change for heartbeat check via mailbox cmd 0 is preventing the dpc thread from carrying out its operation. This patch allows the higher priority error recovery to run first before running the lower priority heartbeat check. Link: https://lore.kernel.org/r/20220310092604.22950-9-njavali@marvell.com Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8aa1cccebab1..d76c0e9f114c 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4624,6 +4624,7 @@ struct qla_hw_data { struct workqueue_struct *wq; struct work_struct heartbeat_work; struct qlfc_fw fw_buf; + unsigned long last_heartbeat_run_jiffies; /* FCP_CMND priority support */ struct qla_fcp_prio_cfg *fcp_prio_cfg; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9c4f2b38b34e..81451c11eef4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7215,7 +7215,7 @@ static bool qla_do_heartbeat(struct scsi_qla_host *vha) return do_heartbeat; } -static void qla_heart_beat(struct scsi_qla_host *vha) +static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) { struct qla_hw_data *ha = vha->hw; @@ -7225,8 +7225,19 @@ static void qla_heart_beat(struct scsi_qla_host *vha) if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) + /* + * dpc thread cannot run if heartbeat is running at the same time. + * We also do not want to starve heartbeat task. Therefore, do + * heartbeat task at least once every 5 seconds. + */ + if (dpc_started && + time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ)) + return; + + if (qla_do_heartbeat(vha)) { + ha->last_heartbeat_run_jiffies = jiffies; queue_work(ha->wq, &ha->heartbeat_work); + } } /************************************************************************** @@ -7417,6 +7428,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } + /* borrowing w to signify dpc will run */ + w = 0; /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || @@ -7449,9 +7462,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); + w = 1; } - qla_heart_beat(vha); + qla_heart_beat(vha, w); qla2x00_restart_timer(vha, WATCH_INTERVAL); }

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix hang due to session stuck" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c02aada06d19a215c8291bd968a99a270e96f734 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:58 -0800 Subject: [PATCH] scsi: qla2xxx: Fix hang due to session stuck User experienced device lost. The log shows Get port data base command was queued up, failed, and requeued again. Every time it is requeued, it set the FCF_ASYNC_ACTIVE. This prevents any recovery code from occurring because driver thinks a recovery is in progress for this session. In essence, this session is hung. The reason it gets into this place is the session deletion got in front of this call due to link perturbation. Break the requeue cycle and exit. The session deletion code will trigger a session relogin. Link: https://lore.kernel.org/r/20220310092604.22950-8-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index bab2f665b6c2..8aa1cccebab1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5438,4 +5438,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e468b05f90c0..5dfaa4d39cec 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -575,6 +575,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; @@ -1338,8 +1346,15 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { ql_log(ql_log_warn, vha, 0xffff, "%s: %8phC online %d flags %x - not sending command.\n", __func__, fcport->port_name, vha->flags.online, fcport->flags);

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix hang due to session stuck" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c02aada06d19a215c8291bd968a99a270e96f734 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:58 -0800 Subject: [PATCH] scsi: qla2xxx: Fix hang due to session stuck User experienced device lost. The log shows Get port data base command was queued up, failed, and requeued again. Every time it is requeued, it set the FCF_ASYNC_ACTIVE. This prevents any recovery code from occurring because driver thinks a recovery is in progress for this session. In essence, this session is hung. The reason it gets into this place is the session deletion got in front of this call due to link perturbation. Break the requeue cycle and exit. The session deletion code will trigger a session relogin. Link: https://lore.kernel.org/r/20220310092604.22950-8-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index bab2f665b6c2..8aa1cccebab1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5438,4 +5438,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e468b05f90c0..5dfaa4d39cec 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -575,6 +575,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; @@ -1338,8 +1346,15 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { ql_log(ql_log_warn, vha, 0xffff, "%s: %8phC online %d flags %x - not sending command.\n", __func__, fcport->port_name, vha->flags.online, fcport->flags);

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 8 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror April 2022