- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From db212f2eb3fb7f546366777e93c8f54614d39269 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5723082d94d6..3bf5cbd754a7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); - if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { ql_log(ql_log_warn, vha, 0xfffd, "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); ql_log(ql_log_info, vha, 0xfffb, - "Number of NVME queues used for this port: %d\n", + "Number of NVME queues used for this port: %d\n", qla_nvme_fc_transport.max_hw_queues); pinfo.node_name = wwn_to_u64(vha->node_name); @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From db212f2eb3fb7f546366777e93c8f54614d39269 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5723082d94d6..3bf5cbd754a7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); - if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { ql_log(ql_log_warn, vha, 0xfffd, "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); ql_log(ql_log_info, vha, 0xfffb, - "Number of NVME queues used for this port: %d\n", + "Number of NVME queues used for this port: %d\n", qla_nvme_fc_transport.max_hw_queues); pinfo.node_name = wwn_to_u64(vha->node_name); @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From db212f2eb3fb7f546366777e93c8f54614d39269 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5723082d94d6..3bf5cbd754a7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); - if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { ql_log(ql_log_warn, vha, 0xfffd, "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); ql_log(ql_log_info, vha, 0xfffb, - "Number of NVME queues used for this port: %d\n", + "Number of NVME queues used for this port: %d\n", qla_nvme_fc_transport.max_hw_queues); pinfo.node_name = wwn_to_u64(vha->node_name); @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From db212f2eb3fb7f546366777e93c8f54614d39269 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5723082d94d6..3bf5cbd754a7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); - if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { ql_log(ql_log_warn, vha, 0xfffd, "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); ql_log(ql_log_info, vha, 0xfffb, - "Number of NVME queues used for this port: %d\n", + "Number of NVME queues used for this port: %d\n", qla_nvme_fc_transport.max_hw_queues); pinfo.node_name = wwn_to_u64(vha->node_name); @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From db212f2eb3fb7f546366777e93c8f54614d39269 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5723082d94d6..3bf5cbd754a7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); - if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { ql_log(ql_log_warn, vha, 0xfffd, "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); ql_log(ql_log_info, vha, 0xfffb, - "Number of NVME queues used for this port: %d\n", + "Number of NVME queues used for this port: %d\n", qla_nvme_fc_transport.max_hw_queues); pinfo.node_name = wwn_to_u64(vha->node_name); @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix incorrect reporting of task management" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 58ca5999e0367d131de82a75257fbfd5aed0195d Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:52 -0800 Subject: [PATCH] scsi: qla2xxx: Fix incorrect reporting of task management failure User experienced no task management error while target device is responding with error. The RSP_CODE field in the status IOCB is in little endian. Driver assumes it's big endian and it picked up erroneous data. Convert the data back to big endian as is on the wire. Link: https://lore.kernel.org/r/20220310092604.22950-2-njavali@marvell.com Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 092e4b5da65a..21b31d6359c8 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2498,6 +2498,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) iocb->u.tmf.data = QLA_FUNCTION_FAILED; } else if ((le16_to_cpu(sts->scsi_status) & SS_RESPONSE_INFO_LEN_VALID)) { + host_to_fcp_swap(sts->data, sizeof(sts->data)); if (le32_to_cpu(sts->rsp_data_len) < 4) { ql_log(ql_log_warn, fcport->vha, 0x503b, "Async-%s error - hdl=%x not enough response(%d).\n",

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f3502e2e98a92981601edc3dadf4b0f43c79836b Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:26:01 -0800 Subject: [PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject Remove stale recovery code that prevents normal path recovery. Link: https://lore.kernel.org/r/20220310092604.22950-11-njavali@marvell.com Fixes: 1cbc0efcd9be ("scsi: qla2xxx: Fix retry for PRLI RJT with reason of BUSY") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 7f5b5811c23d..3f3417a3e891 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2105,13 +2105,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: - if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) && - (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */ - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP; - break; - } - sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n",

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f3502e2e98a92981601edc3dadf4b0f43c79836b Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:26:01 -0800 Subject: [PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject Remove stale recovery code that prevents normal path recovery. Link: https://lore.kernel.org/r/20220310092604.22950-11-njavali@marvell.com Fixes: 1cbc0efcd9be ("scsi: qla2xxx: Fix retry for PRLI RJT with reason of BUSY") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 7f5b5811c23d..3f3417a3e891 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2105,13 +2105,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: - if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) && - (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */ - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP; - break; - } - sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n",

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f3502e2e98a92981601edc3dadf4b0f43c79836b Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:26:01 -0800 Subject: [PATCH] scsi: qla2xxx: Fix stuck session of PRLI reject Remove stale recovery code that prevents normal path recovery. Link: https://lore.kernel.org/r/20220310092604.22950-11-njavali@marvell.com Fixes: 1cbc0efcd9be ("scsi: qla2xxx: Fix retry for PRLI RJT with reason of BUSY") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 7f5b5811c23d..3f3417a3e891 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2105,13 +2105,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: - if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) && - (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */ - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP; - break; - } - sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n",

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix crash during module load unload test" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0972252450f90db56dd5415a20e2aec21a08d036 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:56 -0800 Subject: [PATCH] scsi: qla2xxx: Fix crash during module load unload test During purex packet handling the driver was incorrectly freeing a pre-allocated structure. Fix this by skipping that entry. System crashed with the following stack during a module unload test. Call Trace: sbitmap_init_node+0x7f/0x1e0 sbitmap_queue_init_node+0x24/0x150 blk_mq_init_bitmaps+0x3d/0xa0 blk_mq_init_tags+0x68/0x90 blk_mq_alloc_map_and_rqs+0x44/0x120 blk_mq_alloc_set_map_and_rqs+0x63/0x150 blk_mq_alloc_tag_set+0x11b/0x230 scsi_add_host_with_dma.cold+0x3f/0x245 qla2x00_probe_one+0xd5a/0x1b80 [qla2xxx] Call Trace with slub_debug and debug kernel: kasan_report_invalid_free+0x50/0x80 __kasan_slab_free+0x137/0x150 slab_free_freelist_hook+0xc6/0x190 kfree+0xe8/0x2e0 qla2x00_free_device+0x3bb/0x5d0 [qla2xxx] qla2x00_remove_one+0x668/0xcf0 [qla2xxx] Link: https://lore.kernel.org/r/20220310092604.22950-6-njavali@marvell.com Fixes: 62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 58c83525f006..9c4f2b38b34e 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3901,6 +3901,8 @@ qla24xx_free_purex_list(struct purex_list *list) spin_lock_irqsave(&list->lock, flags); list_for_each_entry_safe(item, next, &list->head, list) { list_del(&item->list); + if (item == &item->vha->default_item) + continue; kfree(item); } spin_unlock_irqrestore(&list->lock, flags);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix crash during module load unload test" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0972252450f90db56dd5415a20e2aec21a08d036 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:56 -0800 Subject: [PATCH] scsi: qla2xxx: Fix crash during module load unload test During purex packet handling the driver was incorrectly freeing a pre-allocated structure. Fix this by skipping that entry. System crashed with the following stack during a module unload test. Call Trace: sbitmap_init_node+0x7f/0x1e0 sbitmap_queue_init_node+0x24/0x150 blk_mq_init_bitmaps+0x3d/0xa0 blk_mq_init_tags+0x68/0x90 blk_mq_alloc_map_and_rqs+0x44/0x120 blk_mq_alloc_set_map_and_rqs+0x63/0x150 blk_mq_alloc_tag_set+0x11b/0x230 scsi_add_host_with_dma.cold+0x3f/0x245 qla2x00_probe_one+0xd5a/0x1b80 [qla2xxx] Call Trace with slub_debug and debug kernel: kasan_report_invalid_free+0x50/0x80 __kasan_slab_free+0x137/0x150 slab_free_freelist_hook+0xc6/0x190 kfree+0xe8/0x2e0 qla2x00_free_device+0x3bb/0x5d0 [qla2xxx] qla2x00_remove_one+0x668/0xcf0 [qla2xxx] Link: https://lore.kernel.org/r/20220310092604.22950-6-njavali@marvell.com Fixes: 62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests") Cc: stable(a)vger.kernel.org Reported-by: Marco Patalano <mpatalan(a)redhat.com> Tested-by: Marco Patalano <mpatalan(a)redhat.com> Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 58c83525f006..9c4f2b38b34e 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3901,6 +3901,8 @@ qla24xx_free_purex_list(struct purex_list *list) spin_lock_irqsave(&list->lock, flags); list_for_each_entry_safe(item, next, &list->head, list) { list_del(&item->list); + if (item == &item->vha->default_item) + continue; kfree(item); } spin_unlock_irqrestore(&list->lock, flags);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c85ab7d9e27a80e48d5b7d7fb2fe2b0fdb2de523 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:55 -0800 Subject: [PATCH] scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests At NVMe ELS request time, request structure is DMA mapped and never unmapped. Fix this by calling the unmap on ELS completion. Link: https://lore.kernel.org/r/20220310092604.22950-5-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 3bf5cbd754a7..794a95b2e3b4 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -175,6 +175,18 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) qla2xxx_rel_qpair_sp(sp->qpair, sp); } +static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd) +{ + if (sp->flags & SRB_DMA_VALID) { + struct srb_iocb *nvme = &sp->u.iocb_cmd; + struct qla_hw_data *ha = sp->fcport->vha->hw; + + dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + fd->rqstlen, DMA_TO_DEVICE); + sp->flags &= ~SRB_DMA_VALID; + } +} + static void qla_nvme_release_ls_cmd_kref(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); @@ -191,6 +203,8 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref) spin_unlock_irqrestore(&priv->cmd_lock, flags); fd = priv->fd; + + qla_nvme_ls_unmap(sp, fd); fd->done(fd, priv->comp_status); out: qla2x00_rel_sp(sp); @@ -361,6 +375,8 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, fd->rqstlen, DMA_TO_DEVICE); + sp->flags |= SRB_DMA_VALID; + rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, @@ -368,6 +384,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; + qla_nvme_ls_unmap(sp, fd); qla2x00_rel_sp(sp); return rval; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c85ab7d9e27a80e48d5b7d7fb2fe2b0fdb2de523 Mon Sep 17 00:00:00 2001 From: Arun Easi <aeasi(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:55 -0800 Subject: [PATCH] scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests At NVMe ELS request time, request structure is DMA mapped and never unmapped. Fix this by calling the unmap on ELS completion. Link: https://lore.kernel.org/r/20220310092604.22950-5-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Arun Easi <aeasi(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 3bf5cbd754a7..794a95b2e3b4 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -175,6 +175,18 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) qla2xxx_rel_qpair_sp(sp->qpair, sp); } +static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd) +{ + if (sp->flags & SRB_DMA_VALID) { + struct srb_iocb *nvme = &sp->u.iocb_cmd; + struct qla_hw_data *ha = sp->fcport->vha->hw; + + dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + fd->rqstlen, DMA_TO_DEVICE); + sp->flags &= ~SRB_DMA_VALID; + } +} + static void qla_nvme_release_ls_cmd_kref(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); @@ -191,6 +203,8 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref) spin_unlock_irqrestore(&priv->cmd_lock, flags); fd = priv->fd; + + qla_nvme_ls_unmap(sp, fd); fd->done(fd, priv->comp_status); out: qla2x00_rel_sp(sp); @@ -361,6 +375,8 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, fd->rqstlen, DMA_TO_DEVICE); + sp->flags |= SRB_DMA_VALID; + rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, @@ -368,6 +384,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; + qla_nvme_ls_unmap(sp, fd); qla2x00_rel_sp(sp); return rval; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix laggy FC remote port session recovery" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 713b415726f100f6644971e75ebfe1edbef1a390 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:59 -0800 Subject: [PATCH] scsi: qla2xxx: Fix laggy FC remote port session recovery For session recovery, driver relies on the dpc thread to initiate certain operations. The dpc thread runs exclusively without the Mailbox interface being occupied. A recent code change for heartbeat check via mailbox cmd 0 is preventing the dpc thread from carrying out its operation. This patch allows the higher priority error recovery to run first before running the lower priority heartbeat check. Link: https://lore.kernel.org/r/20220310092604.22950-9-njavali@marvell.com Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8aa1cccebab1..d76c0e9f114c 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4624,6 +4624,7 @@ struct qla_hw_data { struct workqueue_struct *wq; struct work_struct heartbeat_work; struct qlfc_fw fw_buf; + unsigned long last_heartbeat_run_jiffies; /* FCP_CMND priority support */ struct qla_fcp_prio_cfg *fcp_prio_cfg; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9c4f2b38b34e..81451c11eef4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7215,7 +7215,7 @@ static bool qla_do_heartbeat(struct scsi_qla_host *vha) return do_heartbeat; } -static void qla_heart_beat(struct scsi_qla_host *vha) +static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) { struct qla_hw_data *ha = vha->hw; @@ -7225,8 +7225,19 @@ static void qla_heart_beat(struct scsi_qla_host *vha) if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) + /* + * dpc thread cannot run if heartbeat is running at the same time. + * We also do not want to starve heartbeat task. Therefore, do + * heartbeat task at least once every 5 seconds. + */ + if (dpc_started && + time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ)) + return; + + if (qla_do_heartbeat(vha)) { + ha->last_heartbeat_run_jiffies = jiffies; queue_work(ha->wq, &ha->heartbeat_work); + } } /************************************************************************** @@ -7417,6 +7428,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } + /* borrowing w to signify dpc will run */ + w = 0; /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || @@ -7449,9 +7462,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); + w = 1; } - qla_heart_beat(vha); + qla_heart_beat(vha, w); qla2x00_restart_timer(vha, WATCH_INTERVAL); }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix hang due to session stuck" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c02aada06d19a215c8291bd968a99a270e96f734 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:58 -0800 Subject: [PATCH] scsi: qla2xxx: Fix hang due to session stuck User experienced device lost. The log shows Get port data base command was queued up, failed, and requeued again. Every time it is requeued, it set the FCF_ASYNC_ACTIVE. This prevents any recovery code from occurring because driver thinks a recovery is in progress for this session. In essence, this session is hung. The reason it gets into this place is the session deletion got in front of this call due to link perturbation. Break the requeue cycle and exit. The session deletion code will trigger a session relogin. Link: https://lore.kernel.org/r/20220310092604.22950-8-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index bab2f665b6c2..8aa1cccebab1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5438,4 +5438,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e468b05f90c0..5dfaa4d39cec 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -575,6 +575,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; @@ -1338,8 +1346,15 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { ql_log(ql_log_warn, vha, 0xffff, "%s: %8phC online %d flags %x - not sending command.\n", __func__, fcport->port_name, vha->flags.online, fcport->flags);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] scsi: qla2xxx: Fix hang due to session stuck" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c02aada06d19a215c8291bd968a99a270e96f734 Mon Sep 17 00:00:00 2001 From: Quinn Tran <qutran(a)marvell.com> Date: Thu, 10 Mar 2022 01:25:58 -0800 Subject: [PATCH] scsi: qla2xxx: Fix hang due to session stuck User experienced device lost. The log shows Get port data base command was queued up, failed, and requeued again. Every time it is requeued, it set the FCF_ASYNC_ACTIVE. This prevents any recovery code from occurring because driver thinks a recovery is in progress for this session. In essence, this session is hung. The reason it gets into this place is the session deletion got in front of this call due to link perturbation. Break the requeue cycle and exit. The session deletion code will trigger a session relogin. Link: https://lore.kernel.org/r/20220310092604.22950-8-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable(a)vger.kernel.org Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com> Signed-off-by: Quinn Tran <qutran(a)marvell.com> Signed-off-by: Nilesh Javali <njavali(a)marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index bab2f665b6c2..8aa1cccebab1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5438,4 +5438,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e468b05f90c0..5dfaa4d39cec 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -575,6 +575,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; @@ -1338,8 +1346,15 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { ql_log(ql_log_warn, vha, 0xffff, "%s: %8phC online %d flags %x - not sending command.\n", __func__, fcport->port_name, vha->flags.online, fcport->flags);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/tm: Fix more userspace r13 corruption" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin(a)gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable(a)vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com> Acked-by: Michael Neuling <mikey(a)neuling.org> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/kasan: Fix early region not updated correctly" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From dd75080aa8409ce10d50fb58981c6b59bf8707d3 Mon Sep 17 00:00:00 2001 From: Chen Jingwen <chenjingwen6(a)huawei.com> Date: Wed, 29 Dec 2021 11:52:26 +0800 Subject: [PATCH] powerpc/kasan: Fix early region not updated correctly The shadow's page table is not updated when PTE_RPN_SHIFT is 24 and PAGE_SHIFT is 12. It not only causes false positives but also false negative as shown the following text. Fix it by bringing the logic of kasan_early_shadow_page_entry here. 1. False Positive: ================================================================== BUG: KASAN: vmalloc-out-of-bounds in pcpu_alloc+0x508/0xa50 Write of size 16 at addr f57f3be0 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-12267-gdebe436e77c7 #1 Call Trace: [c80d1c20] [c07fe7b8] dump_stack_lvl+0x4c/0x6c (unreliable) [c80d1c40] [c02ff668] print_address_description.constprop.0+0x88/0x300 [c80d1c70] [c02ff45c] kasan_report+0x1ec/0x200 [c80d1cb0] [c0300b20] kasan_check_range+0x160/0x2f0 [c80d1cc0] [c03018a4] memset+0x34/0x90 [c80d1ce0] [c0280108] pcpu_alloc+0x508/0xa50 [c80d1d40] [c02fd7bc] __kmem_cache_create+0xfc/0x570 [c80d1d70] [c0283d64] kmem_cache_create_usercopy+0x274/0x3e0 [c80d1db0] [c2036580] init_sd+0xc4/0x1d0 [c80d1de0] [c00044a0] do_one_initcall+0xc0/0x33c [c80d1eb0] [c2001624] kernel_init_freeable+0x2c8/0x384 [c80d1ef0] [c0004b14] kernel_init+0x24/0x170 [c80d1f10] [c001b26c] ret_from_kernel_thread+0x5c/0x64 Memory state around the buggy address: f57f3a80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3b00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >f57f3b80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ f57f3c00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3c80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ================================================================== 2. False Negative (with KASAN tests): ================================================================== Before fix: ok 45 - kmalloc_double_kzfree # vmalloc_oob: EXPECTATION FAILED at lib/test_kasan.c:1039 KASAN failure expected in "((volatile char *)area)[3100]", but none occurred not ok 46 - vmalloc_oob not ok 1 - kasan ================================================================== After fix: ok 1 - kasan Fixes: cbd18991e24fe ("powerpc/mm: Fix an Oops in kasan_mmu_init()") Cc: stable(a)vger.kernel.org # 5.4.x Signed-off-by: Chen Jingwen <chenjingwen6(a)huawei.com> Reviewed-by: Christophe Leroy <christophe.leroy(a)csgroup.eu> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20211229035226.59159-1-chenjingwen6@huawei.com diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692..f3e4d069e0ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86/mmu: Use common TDP MMU zap helper for MMU notifier" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 83b83a02073ec8d18c77a9bbe0881d710f7a9d32 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Wed, 15 Dec 2021 01:15:54 +0000 Subject: [PATCH] KVM: x86/mmu: Use common TDP MMU zap helper for MMU notifier unmap hook Use the common TDP MMU zap helper when handling an MMU notifier unmap event, the two flows are semantically identical. Consolidate the code in preparation for a future bug fix, as both kvm_tdp_mmu_unmap_gfn_range() and __kvm_tdp_mmu_zap_gfn_range() are guilty of not zapping SPTEs in invalid roots. No functional change intended. Cc: stable(a)vger.kernel.org Signed-off-by: Sean Christopherson <seanjc(a)google.com> Message-Id: <20211215011557.399940-2-seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bc9e3553fba2..b9814e2b397f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1032,13 +1032,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - struct kvm_mmu_page *root; - - for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) - flush = zap_gfn_range(kvm, root, range->start, range->end, - range->may_block, flush, false); - - return flush; + return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start, + range->end, range->may_block, flush); } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86: Reinitialize context if host userspace toggles" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d6174299365ddbbf491620c0b8c5ca1a6ef2eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Wed, 9 Feb 2022 04:56:05 -0500 Subject: [PATCH] KVM: x86: Reinitialize context if host userspace toggles EFER.LME While the guest runs, EFER.LME cannot change unless CR0.PG is clear, and therefore EFER.NX is the only bit that can affect the MMU role. However, set_efer accepts a host-initiated change to EFER.LME even with CR0.PG=1. In that case, the MMU has to be reset. Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: stable(a)vger.kernel.org Reviewed-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 51faa2c76ca5..a5a50cfeffff 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b95c379e234..b724273493d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,8 +1648,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86: Reinitialize context if host userspace toggles" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d6174299365ddbbf491620c0b8c5ca1a6ef2eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Wed, 9 Feb 2022 04:56:05 -0500 Subject: [PATCH] KVM: x86: Reinitialize context if host userspace toggles EFER.LME While the guest runs, EFER.LME cannot change unless CR0.PG is clear, and therefore EFER.NX is the only bit that can affect the MMU role. However, set_efer accepts a host-initiated change to EFER.LME even with CR0.PG=1. In that case, the MMU has to be reset. Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: stable(a)vger.kernel.org Reviewed-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 51faa2c76ca5..a5a50cfeffff 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b95c379e234..b724273493d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,8 +1648,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86: Reinitialize context if host userspace toggles" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d6174299365ddbbf491620c0b8c5ca1a6ef2eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Wed, 9 Feb 2022 04:56:05 -0500 Subject: [PATCH] KVM: x86: Reinitialize context if host userspace toggles EFER.LME While the guest runs, EFER.LME cannot change unless CR0.PG is clear, and therefore EFER.NX is the only bit that can affect the MMU role. However, set_efer accepts a host-initiated change to EFER.LME even with CR0.PG=1. In that case, the MMU has to be reset. Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: stable(a)vger.kernel.org Reviewed-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 51faa2c76ca5..a5a50cfeffff 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b95c379e234..b724273493d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,8 +1648,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86: Reinitialize context if host userspace toggles" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d6174299365ddbbf491620c0b8c5ca1a6ef2eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Wed, 9 Feb 2022 04:56:05 -0500 Subject: [PATCH] KVM: x86: Reinitialize context if host userspace toggles EFER.LME While the guest runs, EFER.LME cannot change unless CR0.PG is clear, and therefore EFER.NX is the only bit that can affect the MMU role. However, set_efer accepts a host-initiated change to EFER.LME even with CR0.PG=1. In that case, the MMU has to be reset. Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: stable(a)vger.kernel.org Reviewed-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 51faa2c76ca5..a5a50cfeffff 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b95c379e234..b724273493d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,8 +1648,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86: Reinitialize context if host userspace toggles" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d6174299365ddbbf491620c0b8c5ca1a6ef2eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Wed, 9 Feb 2022 04:56:05 -0500 Subject: [PATCH] KVM: x86: Reinitialize context if host userspace toggles EFER.LME While the guest runs, EFER.LME cannot change unless CR0.PG is clear, and therefore EFER.NX is the only bit that can affect the MMU role. However, set_efer accepts a host-initiated change to EFER.LME even with CR0.PG=1. In that case, the MMU has to be reset. Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: stable(a)vger.kernel.org Reviewed-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 51faa2c76ca5..a5a50cfeffff 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b95c379e234..b724273493d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,8 +1648,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Disable preemption across AVIC load/put during" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b652de1e3dfb3b49e539e88a684a68e333e1bd7c Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Tue, 1 Mar 2022 09:05:09 -0800 Subject: [PATCH] KVM: SVM: Disable preemption across AVIC load/put during APICv refresh Disable preemption when loading/putting the AVIC during an APICv refresh. If the vCPU task is preempted and migrated ot a different pCPU, the unprotected avic_vcpu_load() could set the wrong pCPU in the physical ID cache/table. Pull the necessary code out of avic_vcpu_{,un}blocking() and into a new helper to reduce the probability of introducing this exact bug a third time. Fixes: df7e4827c549 ("KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC") Cc: stable(a)vger.kernel.org Reported-by: Maxim Levitsky <mlevitsk(a)redhat.com> Signed-off-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index aea0b13773fd..1afde44b1252 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -616,38 +616,6 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) return ret; } -void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb01.ptr; - bool activated = kvm_vcpu_apicv_active(vcpu); - - if (!enable_apicv) - return; - - if (activated) { - /** - * During AVIC temporary deactivation, guest could update - * APIC ID, DFR and LDR registers, which would not be trapped - * by avic_unaccelerated_access_interception(). In this case, - * we need to check and update the AVIC logical APIC ID table - * accordingly before re-activating. - */ - avic_apicv_post_state_restore(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - } else { - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; - } - vmcb_mark_dirty(vmcb, VMCB_AVIC); - - if (activated) - avic_vcpu_load(vcpu, vcpu->cpu); - else - avic_vcpu_put(vcpu); - - avic_set_pi_irte_mode(vcpu, activated); -} - static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -899,7 +867,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) return ret; } -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ @@ -936,7 +904,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void avic_vcpu_put(struct kvm_vcpu *vcpu) +void __avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -955,13 +923,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } +static void avic_vcpu_load(struct kvm_vcpu *vcpu) +{ + int cpu = get_cpu(); + + WARN_ON(cpu != vcpu->cpu); + + __avic_vcpu_load(vcpu, cpu); + + put_cpu(); +} + +static void avic_vcpu_put(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + __avic_vcpu_put(vcpu); + + preempt_enable(); +} + +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb01.ptr; + bool activated = kvm_vcpu_apicv_active(vcpu); + + if (!enable_apicv) + return; + + if (activated) { + /** + * During AVIC temporary deactivation, guest could update + * APIC ID, DFR and LDR registers, which would not be trapped + * by avic_unaccelerated_access_interception(). In this case, + * we need to check and update the AVIC logical APIC ID table + * accordingly before re-activating. + */ + avic_apicv_post_state_restore(vcpu); + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + } else { + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + } + vmcb_mark_dirty(vmcb, VMCB_AVIC); + + if (activated) + avic_vcpu_load(vcpu); + else + avic_vcpu_put(vcpu); + + avic_set_pi_irte_mode(vcpu, activated); +} + void avic_vcpu_blocking(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_apicv_active(vcpu)) return; - preempt_disable(); - /* * Unload the AVIC when the vCPU is about to block, _before_ * the vCPU actually blocks. @@ -976,21 +994,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu) * the cause of errata #1235). */ avic_vcpu_put(vcpu); - - preempt_enable(); } void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) { - int cpu; - if (!kvm_vcpu_apicv_active(vcpu)) return; - cpu = get_cpu(); - WARN_ON(cpu != vcpu->cpu); - - avic_vcpu_load(vcpu, cpu); - - put_cpu(); + avic_vcpu_load(vcpu); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7038c76fa841..c5e3f219803e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1318,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_load(vcpu, cpu); + __avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_put(vcpu); + __avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 70850cbe5bcb..e45b5645d5e0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -576,8 +576,8 @@ void avic_init_vmcb(struct vcpu_svm *svm); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void avic_vcpu_put(struct kvm_vcpu *vcpu); +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void __avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Disable preemption across AVIC load/put during" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b652de1e3dfb3b49e539e88a684a68e333e1bd7c Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Tue, 1 Mar 2022 09:05:09 -0800 Subject: [PATCH] KVM: SVM: Disable preemption across AVIC load/put during APICv refresh Disable preemption when loading/putting the AVIC during an APICv refresh. If the vCPU task is preempted and migrated ot a different pCPU, the unprotected avic_vcpu_load() could set the wrong pCPU in the physical ID cache/table. Pull the necessary code out of avic_vcpu_{,un}blocking() and into a new helper to reduce the probability of introducing this exact bug a third time. Fixes: df7e4827c549 ("KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC") Cc: stable(a)vger.kernel.org Reported-by: Maxim Levitsky <mlevitsk(a)redhat.com> Signed-off-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index aea0b13773fd..1afde44b1252 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -616,38 +616,6 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) return ret; } -void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb01.ptr; - bool activated = kvm_vcpu_apicv_active(vcpu); - - if (!enable_apicv) - return; - - if (activated) { - /** - * During AVIC temporary deactivation, guest could update - * APIC ID, DFR and LDR registers, which would not be trapped - * by avic_unaccelerated_access_interception(). In this case, - * we need to check and update the AVIC logical APIC ID table - * accordingly before re-activating. - */ - avic_apicv_post_state_restore(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - } else { - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; - } - vmcb_mark_dirty(vmcb, VMCB_AVIC); - - if (activated) - avic_vcpu_load(vcpu, vcpu->cpu); - else - avic_vcpu_put(vcpu); - - avic_set_pi_irte_mode(vcpu, activated); -} - static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -899,7 +867,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) return ret; } -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ @@ -936,7 +904,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void avic_vcpu_put(struct kvm_vcpu *vcpu) +void __avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -955,13 +923,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } +static void avic_vcpu_load(struct kvm_vcpu *vcpu) +{ + int cpu = get_cpu(); + + WARN_ON(cpu != vcpu->cpu); + + __avic_vcpu_load(vcpu, cpu); + + put_cpu(); +} + +static void avic_vcpu_put(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + __avic_vcpu_put(vcpu); + + preempt_enable(); +} + +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb01.ptr; + bool activated = kvm_vcpu_apicv_active(vcpu); + + if (!enable_apicv) + return; + + if (activated) { + /** + * During AVIC temporary deactivation, guest could update + * APIC ID, DFR and LDR registers, which would not be trapped + * by avic_unaccelerated_access_interception(). In this case, + * we need to check and update the AVIC logical APIC ID table + * accordingly before re-activating. + */ + avic_apicv_post_state_restore(vcpu); + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + } else { + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + } + vmcb_mark_dirty(vmcb, VMCB_AVIC); + + if (activated) + avic_vcpu_load(vcpu); + else + avic_vcpu_put(vcpu); + + avic_set_pi_irte_mode(vcpu, activated); +} + void avic_vcpu_blocking(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_apicv_active(vcpu)) return; - preempt_disable(); - /* * Unload the AVIC when the vCPU is about to block, _before_ * the vCPU actually blocks. @@ -976,21 +994,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu) * the cause of errata #1235). */ avic_vcpu_put(vcpu); - - preempt_enable(); } void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) { - int cpu; - if (!kvm_vcpu_apicv_active(vcpu)) return; - cpu = get_cpu(); - WARN_ON(cpu != vcpu->cpu); - - avic_vcpu_load(vcpu, cpu); - - put_cpu(); + avic_vcpu_load(vcpu); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7038c76fa841..c5e3f219803e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1318,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_load(vcpu, cpu); + __avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_put(vcpu); + __avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 70850cbe5bcb..e45b5645d5e0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -576,8 +576,8 @@ void avic_init_vmcb(struct vcpu_svm *svm); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void avic_vcpu_put(struct kvm_vcpu *vcpu); +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void __avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Disable preemption across AVIC load/put during" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b652de1e3dfb3b49e539e88a684a68e333e1bd7c Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Tue, 1 Mar 2022 09:05:09 -0800 Subject: [PATCH] KVM: SVM: Disable preemption across AVIC load/put during APICv refresh Disable preemption when loading/putting the AVIC during an APICv refresh. If the vCPU task is preempted and migrated ot a different pCPU, the unprotected avic_vcpu_load() could set the wrong pCPU in the physical ID cache/table. Pull the necessary code out of avic_vcpu_{,un}blocking() and into a new helper to reduce the probability of introducing this exact bug a third time. Fixes: df7e4827c549 ("KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC") Cc: stable(a)vger.kernel.org Reported-by: Maxim Levitsky <mlevitsk(a)redhat.com> Signed-off-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index aea0b13773fd..1afde44b1252 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -616,38 +616,6 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) return ret; } -void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb01.ptr; - bool activated = kvm_vcpu_apicv_active(vcpu); - - if (!enable_apicv) - return; - - if (activated) { - /** - * During AVIC temporary deactivation, guest could update - * APIC ID, DFR and LDR registers, which would not be trapped - * by avic_unaccelerated_access_interception(). In this case, - * we need to check and update the AVIC logical APIC ID table - * accordingly before re-activating. - */ - avic_apicv_post_state_restore(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - } else { - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; - } - vmcb_mark_dirty(vmcb, VMCB_AVIC); - - if (activated) - avic_vcpu_load(vcpu, vcpu->cpu); - else - avic_vcpu_put(vcpu); - - avic_set_pi_irte_mode(vcpu, activated); -} - static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -899,7 +867,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) return ret; } -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ @@ -936,7 +904,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void avic_vcpu_put(struct kvm_vcpu *vcpu) +void __avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -955,13 +923,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } +static void avic_vcpu_load(struct kvm_vcpu *vcpu) +{ + int cpu = get_cpu(); + + WARN_ON(cpu != vcpu->cpu); + + __avic_vcpu_load(vcpu, cpu); + + put_cpu(); +} + +static void avic_vcpu_put(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + __avic_vcpu_put(vcpu); + + preempt_enable(); +} + +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb01.ptr; + bool activated = kvm_vcpu_apicv_active(vcpu); + + if (!enable_apicv) + return; + + if (activated) { + /** + * During AVIC temporary deactivation, guest could update + * APIC ID, DFR and LDR registers, which would not be trapped + * by avic_unaccelerated_access_interception(). In this case, + * we need to check and update the AVIC logical APIC ID table + * accordingly before re-activating. + */ + avic_apicv_post_state_restore(vcpu); + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + } else { + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + } + vmcb_mark_dirty(vmcb, VMCB_AVIC); + + if (activated) + avic_vcpu_load(vcpu); + else + avic_vcpu_put(vcpu); + + avic_set_pi_irte_mode(vcpu, activated); +} + void avic_vcpu_blocking(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_apicv_active(vcpu)) return; - preempt_disable(); - /* * Unload the AVIC when the vCPU is about to block, _before_ * the vCPU actually blocks. @@ -976,21 +994,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu) * the cause of errata #1235). */ avic_vcpu_put(vcpu); - - preempt_enable(); } void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) { - int cpu; - if (!kvm_vcpu_apicv_active(vcpu)) return; - cpu = get_cpu(); - WARN_ON(cpu != vcpu->cpu); - - avic_vcpu_load(vcpu, cpu); - - put_cpu(); + avic_vcpu_load(vcpu); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7038c76fa841..c5e3f219803e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1318,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_load(vcpu, cpu); + __avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_put(vcpu); + __avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 70850cbe5bcb..e45b5645d5e0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -576,8 +576,8 @@ void avic_init_vmcb(struct vcpu_svm *svm); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void avic_vcpu_put(struct kvm_vcpu *vcpu); +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void __avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID >" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a204f7895878363ca8211f50ec610408c8c70aa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Date: Thu, 10 Feb 2022 18:08:51 -0600 Subject: [PATCH] KVM: SVM: Allow AVIC support on system w/ physical APIC ID > 255 Expand KVM's mask for the AVIC host physical ID to the full 12 bits defined by the architecture. The number of bits consumed by hardware is model specific, e.g. early CPUs ignored bits 11:8, but there is no way for KVM to enumerate the "true" size. So, KVM must allow using all bits, else it risks rejecting completely legal x2APIC IDs on newer CPUs. This means KVM relies on hardware to not assign x2APIC IDs that exceed the "true" width of the field, but presumably hardware is smart enough to tie the width to the max x2APIC ID. KVM also relies on hardware to support at least 8 bits, as the legacy xAPIC ID is writable by software. But, those assumptions are unavoidable due to the lack of any way to enumerate the "true" width. Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Suggested-by: Sean Christopherson <seanjc(a)google.com> Reviewed-by: Sean Christopherson <seanjc(a)google.com> Fixes: 44a95dae1d22 ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> Message-Id: <20220211000851.185799-1-suravee.suthikulpanit(a)amd.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523ce..7eb2df5417fb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -226,7 +226,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1afde44b1252..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -870,17 +870,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e45b5645d5e0..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -569,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/i915/opregion: check port number bounds for SWSCI display" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24a644ebbfd3b13cda702f98907f9dd123e34bf9 Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Thu, 10 Feb 2022 12:36:42 +0200 Subject: [PATCH] drm/i915/opregion: check port number bounds for SWSCI display power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mapping from enum port to whatever port numbering scheme is used by the SWSCI Display Power State Notification is odd, and the memory of it has faded. In any case, the parameter only has space for ports numbered [0..4], and UBSAN reports bit shift beyond it when the platform has port F or more. Since the SWSCI functionality is supposed to be obsolete for new platforms (i.e. ones that might have port F or more), just bail out early if the mapped and mangled port number is beyond what the Display Power State Notification can support. Fixes: 9c4b0a683193 ("drm/i915: add opregion function to notify bios of encoder enable/disable") Cc: <stable(a)vger.kernel.org> # v3.13+ Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Cc: Lucas De Marchi <lucas.demarchi(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4800 Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/cc363f42d6b5a5932b6d218fefcc8… diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index af9d30f56cc1..ad1afe9df6c3 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -363,6 +363,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/i915/opregion: check port number bounds for SWSCI display" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24a644ebbfd3b13cda702f98907f9dd123e34bf9 Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Thu, 10 Feb 2022 12:36:42 +0200 Subject: [PATCH] drm/i915/opregion: check port number bounds for SWSCI display power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mapping from enum port to whatever port numbering scheme is used by the SWSCI Display Power State Notification is odd, and the memory of it has faded. In any case, the parameter only has space for ports numbered [0..4], and UBSAN reports bit shift beyond it when the platform has port F or more. Since the SWSCI functionality is supposed to be obsolete for new platforms (i.e. ones that might have port F or more), just bail out early if the mapped and mangled port number is beyond what the Display Power State Notification can support. Fixes: 9c4b0a683193 ("drm/i915: add opregion function to notify bios of encoder enable/disable") Cc: <stable(a)vger.kernel.org> # v3.13+ Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Cc: Lucas De Marchi <lucas.demarchi(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4800 Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/cc363f42d6b5a5932b6d218fefcc8… diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index af9d30f56cc1..ad1afe9df6c3 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -363,6 +363,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/i915/opregion: check port number bounds for SWSCI display" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24a644ebbfd3b13cda702f98907f9dd123e34bf9 Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Thu, 10 Feb 2022 12:36:42 +0200 Subject: [PATCH] drm/i915/opregion: check port number bounds for SWSCI display power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mapping from enum port to whatever port numbering scheme is used by the SWSCI Display Power State Notification is odd, and the memory of it has faded. In any case, the parameter only has space for ports numbered [0..4], and UBSAN reports bit shift beyond it when the platform has port F or more. Since the SWSCI functionality is supposed to be obsolete for new platforms (i.e. ones that might have port F or more), just bail out early if the mapped and mangled port number is beyond what the Display Power State Notification can support. Fixes: 9c4b0a683193 ("drm/i915: add opregion function to notify bios of encoder enable/disable") Cc: <stable(a)vger.kernel.org> # v3.13+ Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Cc: Lucas De Marchi <lucas.demarchi(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4800 Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/cc363f42d6b5a5932b6d218fefcc8… diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index af9d30f56cc1..ad1afe9df6c3 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -363,6 +363,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/i915/opregion: check port number bounds for SWSCI display" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24a644ebbfd3b13cda702f98907f9dd123e34bf9 Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Thu, 10 Feb 2022 12:36:42 +0200 Subject: [PATCH] drm/i915/opregion: check port number bounds for SWSCI display power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mapping from enum port to whatever port numbering scheme is used by the SWSCI Display Power State Notification is odd, and the memory of it has faded. In any case, the parameter only has space for ports numbered [0..4], and UBSAN reports bit shift beyond it when the platform has port F or more. Since the SWSCI functionality is supposed to be obsolete for new platforms (i.e. ones that might have port F or more), just bail out early if the mapped and mangled port number is beyond what the Display Power State Notification can support. Fixes: 9c4b0a683193 ("drm/i915: add opregion function to notify bios of encoder enable/disable") Cc: <stable(a)vger.kernel.org> # v3.13+ Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Cc: Lucas De Marchi <lucas.demarchi(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4800 Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/cc363f42d6b5a5932b6d218fefcc8… diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index af9d30f56cc1..ad1afe9df6c3 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -363,6 +363,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: aardvark: Fix setting MSI address" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 46ad3dc4171b5ee1d12267d70112563d5760210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org> Date: Mon, 10 Jan 2022 02:50:06 +0100 Subject: [PATCH] PCI: aardvark: Fix setting MSI address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSI address for receiving MSI interrupts needs to be correctly set before enabling processing of MSI interrupts. Move code for setting PCIE_MSI_ADDR_LOW_REG and PCIE_MSI_ADDR_HIGH_REG from advk_pcie_init_msi_irq_domain() to advk_pcie_setup_hw(), before enabling PCIE_CORE_CTRL2_MSI_ENABLE. After this we can remove the now unused member msi_msg, which was used only for MSI doorbell address. MSI address can be any address which cannot be used to DMA to. So change it to the address of the main struct advk_pcie. Link: https://lore.kernel.org/r/20220110015018.26359-12-kabel@kernel.org Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Pali Rohár <pali(a)kernel.org> Signed-off-by: Marek Behún <kabel(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Acked-by: Marc Zyngier <maz(a)kernel.org> Cc: stable(a)vger.kernel.org # f21a8b1b6837 ("PCI: aardvark: Move to MSI handling using generic MSI support") diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 35f78bfc0dc7..b20973fae24d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -277,7 +277,6 @@ struct advk_pcie { raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); struct mutex msi_used_lock; - u16 msi_msg; int link_gen; struct pci_bridge_emul bridge; struct gpio_desc *reset_gpio; @@ -472,6 +471,7 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) static void advk_pcie_setup_hw(struct advk_pcie *pcie) { + phys_addr_t msi_addr; u32 reg; int i; @@ -560,6 +560,11 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* Set MSI address */ + msi_addr = virt_to_phys(pcie); + advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG); + /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; @@ -1175,10 +1180,10 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct advk_pcie *pcie = irq_data_get_irq_chip_data(data); - phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg); + phys_addr_t msi_addr = virt_to_phys(pcie); - msg->address_lo = lower_32_bits(msi_msg); - msg->address_hi = upper_32_bits(msi_msg); + msg->address_lo = lower_32_bits(msi_addr); + msg->address_hi = upper_32_bits(msi_addr); msg->data = data->hwirq; } @@ -1337,18 +1342,10 @@ static struct msi_domain_info advk_msi_domain_info = { static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - phys_addr_t msi_msg_phys; raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - msi_msg_phys = virt_to_phys(&pcie->msi_msg); - - advk_writel(pcie, lower_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_LOW_REG); - advk_writel(pcie, upper_32_bits(msi_msg_phys), - PCIE_MSI_ADDR_HIGH_REG); - pcie->msi_inner_domain = irq_domain_add_linear(NULL, MSI_IRQ_NUM, &advk_msi_domain_ops, pcie);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1874b6d7ab1bdc900e8398026350313ac29caddb Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz(a)kernel.org> Date: Mon, 21 Mar 2022 10:48:42 +0000 Subject: [PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for setup" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") killed PCIe on my XGene-1 box (a Mustang board). The machine itself is still alive, but half of its storage (over NVMe) is gone, and the NVMe driver just times out. Note that this machine boots with a device tree provided by the UEFI firmware (2016 vintage), which could well be non conformant with the spec, hence the breakage. With the patch reverted, the box boots 5.17-rc8 with flying colors. Link: https://lore.kernel.org/all/Yf2wTLjmcRj+AbDv@xps13.dannf Link: https://lore.kernel.org/r/20220321104843.949645-2-maz@kernel.org Fixes: 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") Signed-off-by: Marc Zyngier <maz(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: stable(a)vger.kernel.org Cc: Rob Herring <robh(a)kernel.org> Cc: Toan Le <toan(a)os.amperecomputing.com> Cc: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: Krzysztof Wilczyński <kw(a)linux.com> Cc: Bjorn Helgaas <bhelgaas(a)google.com> Cc: Stéphane Graber <stgraber(a)ubuntu.com> Cc: dann frazier <dann.frazier(a)canonical.com> diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 0d5acbfc7143..aa41ceaf031f 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -479,28 +479,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) } static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, - struct resource_entry *entry, - u8 *ib_reg_mask) + struct of_pci_range *range, u8 *ib_reg_mask) { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; void __iomem *bar_addr; u32 pim_reg; - u64 cpu_addr = entry->res->start; - u64 pci_addr = cpu_addr - entry->offset; - u64 size = resource_size(entry->res); + u64 cpu_addr = range->cpu_addr; + u64 pci_addr = range->pci_addr; + u64 size = range->size; u64 mask = ~(size - 1) | EN_REG; u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64; u32 bar_low; int region; - region = xgene_pcie_select_ib_reg(ib_reg_mask, size); + region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { dev_warn(dev, "invalid pcie dma-range config\n"); return; } - if (entry->res->flags & IORESOURCE_PREFETCH) + if (range->flags & IORESOURCE_PREFETCH) flags |= PCI_BASE_ADDRESS_MEM_PREFETCH; bar_low = pcie_bar_low_val((u32)cpu_addr, flags); @@ -531,13 +530,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port) { - struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port); - struct resource_entry *entry; + struct device_node *np = port->node; + struct of_pci_range range; + struct of_pci_range_parser parser; + struct device *dev = port->dev; u8 ib_reg_mask = 0; - resource_list_for_each_entry(entry, &bridge->dma_ranges) - xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask); + if (of_pci_dma_range_parser_init(&parser, np)) { + dev_err(dev, "missing dma-ranges property\n"); + return -EINVAL; + } + + /* Get the dma-ranges from DT */ + for_each_of_pci_range(&parser, &range) { + u64 end = range.cpu_addr + range.size - 1; + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + range.flags, range.cpu_addr, end, range.pci_addr); + xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); + } return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1874b6d7ab1bdc900e8398026350313ac29caddb Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz(a)kernel.org> Date: Mon, 21 Mar 2022 10:48:42 +0000 Subject: [PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for setup" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") killed PCIe on my XGene-1 box (a Mustang board). The machine itself is still alive, but half of its storage (over NVMe) is gone, and the NVMe driver just times out. Note that this machine boots with a device tree provided by the UEFI firmware (2016 vintage), which could well be non conformant with the spec, hence the breakage. With the patch reverted, the box boots 5.17-rc8 with flying colors. Link: https://lore.kernel.org/all/Yf2wTLjmcRj+AbDv@xps13.dannf Link: https://lore.kernel.org/r/20220321104843.949645-2-maz@kernel.org Fixes: 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") Signed-off-by: Marc Zyngier <maz(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: stable(a)vger.kernel.org Cc: Rob Herring <robh(a)kernel.org> Cc: Toan Le <toan(a)os.amperecomputing.com> Cc: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: Krzysztof Wilczyński <kw(a)linux.com> Cc: Bjorn Helgaas <bhelgaas(a)google.com> Cc: Stéphane Graber <stgraber(a)ubuntu.com> Cc: dann frazier <dann.frazier(a)canonical.com> diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 0d5acbfc7143..aa41ceaf031f 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -479,28 +479,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) } static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, - struct resource_entry *entry, - u8 *ib_reg_mask) + struct of_pci_range *range, u8 *ib_reg_mask) { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; void __iomem *bar_addr; u32 pim_reg; - u64 cpu_addr = entry->res->start; - u64 pci_addr = cpu_addr - entry->offset; - u64 size = resource_size(entry->res); + u64 cpu_addr = range->cpu_addr; + u64 pci_addr = range->pci_addr; + u64 size = range->size; u64 mask = ~(size - 1) | EN_REG; u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64; u32 bar_low; int region; - region = xgene_pcie_select_ib_reg(ib_reg_mask, size); + region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { dev_warn(dev, "invalid pcie dma-range config\n"); return; } - if (entry->res->flags & IORESOURCE_PREFETCH) + if (range->flags & IORESOURCE_PREFETCH) flags |= PCI_BASE_ADDRESS_MEM_PREFETCH; bar_low = pcie_bar_low_val((u32)cpu_addr, flags); @@ -531,13 +530,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port) { - struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port); - struct resource_entry *entry; + struct device_node *np = port->node; + struct of_pci_range range; + struct of_pci_range_parser parser; + struct device *dev = port->dev; u8 ib_reg_mask = 0; - resource_list_for_each_entry(entry, &bridge->dma_ranges) - xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask); + if (of_pci_dma_range_parser_init(&parser, np)) { + dev_err(dev, "missing dma-ranges property\n"); + return -EINVAL; + } + + /* Get the dma-ranges from DT */ + for_each_of_pci_range(&parser, &range) { + u64 end = range.cpu_addr + range.size - 1; + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + range.flags, range.cpu_addr, end, range.pci_addr); + xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); + } return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1874b6d7ab1bdc900e8398026350313ac29caddb Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz(a)kernel.org> Date: Mon, 21 Mar 2022 10:48:42 +0000 Subject: [PATCH] PCI: xgene: Revert "PCI: xgene: Use inbound resources for setup" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") killed PCIe on my XGene-1 box (a Mustang board). The machine itself is still alive, but half of its storage (over NVMe) is gone, and the NVMe driver just times out. Note that this machine boots with a device tree provided by the UEFI firmware (2016 vintage), which could well be non conformant with the spec, hence the breakage. With the patch reverted, the box boots 5.17-rc8 with flying colors. Link: https://lore.kernel.org/all/Yf2wTLjmcRj+AbDv@xps13.dannf Link: https://lore.kernel.org/r/20220321104843.949645-2-maz@kernel.org Fixes: 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") Signed-off-by: Marc Zyngier <maz(a)kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: stable(a)vger.kernel.org Cc: Rob Herring <robh(a)kernel.org> Cc: Toan Le <toan(a)os.amperecomputing.com> Cc: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com> Cc: Krzysztof Wilczyński <kw(a)linux.com> Cc: Bjorn Helgaas <bhelgaas(a)google.com> Cc: Stéphane Graber <stgraber(a)ubuntu.com> Cc: dann frazier <dann.frazier(a)canonical.com> diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 0d5acbfc7143..aa41ceaf031f 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -479,28 +479,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) } static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, - struct resource_entry *entry, - u8 *ib_reg_mask) + struct of_pci_range *range, u8 *ib_reg_mask) { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; void __iomem *bar_addr; u32 pim_reg; - u64 cpu_addr = entry->res->start; - u64 pci_addr = cpu_addr - entry->offset; - u64 size = resource_size(entry->res); + u64 cpu_addr = range->cpu_addr; + u64 pci_addr = range->pci_addr; + u64 size = range->size; u64 mask = ~(size - 1) | EN_REG; u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64; u32 bar_low; int region; - region = xgene_pcie_select_ib_reg(ib_reg_mask, size); + region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { dev_warn(dev, "invalid pcie dma-range config\n"); return; } - if (entry->res->flags & IORESOURCE_PREFETCH) + if (range->flags & IORESOURCE_PREFETCH) flags |= PCI_BASE_ADDRESS_MEM_PREFETCH; bar_low = pcie_bar_low_val((u32)cpu_addr, flags); @@ -531,13 +530,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port) { - struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port); - struct resource_entry *entry; + struct device_node *np = port->node; + struct of_pci_range range; + struct of_pci_range_parser parser; + struct device *dev = port->dev; u8 ib_reg_mask = 0; - resource_list_for_each_entry(entry, &bridge->dma_ranges) - xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask); + if (of_pci_dma_range_parser_init(&parser, np)) { + dev_err(dev, "missing dma-ranges property\n"); + return -EINVAL; + } + + /* Get the dma-ranges from DT */ + for_each_of_pci_range(&parser, &range) { + u64 end = range.cpu_addr + range.size - 1; + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + range.flags, range.cpu_addr, end, range.pci_addr); + xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); + } return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] platform/x86: amd-pmc: Set QOS during suspend on CZN w/ timer" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 32370191c0851da069d242f581cbe2fdb80040cb Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello(a)amd.com> Date: Wed, 23 Feb 2022 11:52:37 -0600 Subject: [PATCH] platform/x86: amd-pmc: Set QOS during suspend on CZN w/ timer wakeup commit 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup") adds support for using another platform timer in lieu of the RTC which doesn't work properly on some systems. This path was validated and worked well before submission. During the 5.16-rc1 merge window other patches were merged that caused this to stop working properly. When this feature was used with 5.16-rc1 or later some OEM laptops with the matching firmware requirements from that commit would shutdown instead of program a timer based wakeup. This was bisected to commit 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path"). This wasn't supposed to cause any negative impacts and also tested well on both Intel and ARM platforms. However this changed the semantics of when CPUs are allowed to be in the deepest state. For the AMD systems in question it appears this causes a firmware crash for timer based wakeup. It's hypothesized to be caused by the `amd-pmc` driver sending `OS_HINT` and all the CPUs going into a deep state while the timer is still being programmed. It's likely a firmware bug, but to avoid it don't allow setting CPUs into the deepest state while using CZN timer wakeup path. If later it's discovered that this also occurs from "regular" suspends without a timer as well or on other silicon, this may be later expanded to run in the suspend path for more scenarios. Cc: stable(a)vger.kernel.org # 5.16+ Suggested-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com> Link: https://lore.kernel.org/linux-acpi/BL1PR12MB51570F5BD05980A0DCA1F3F4E23A9@B… Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path") Fixes: 23f62d7ab25b ("PM: sleep: Pause cpuidle later and resume it earlier during system transitions") Fixes: 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup") Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com> Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com> Link: https://lore.kernel.org/r/20220223175237.6209-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 69fdbb0d9f45..425a86108f75 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/platform_device.h> +#include <linux/pm_qos.h> #include <linux/rtc.h> #include <linux/suspend.h> #include <linux/seq_file.h> @@ -95,6 +96,9 @@ #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 +/* QoS request for letting CPUs in idle states, but not the deepest */ +#define AMD_PMC_MAX_IDLE_STATE_LATENCY 3 + #define SOC_SUBSYSTEM_IP_MAX 12 #define DELAY_MIN_US 2000 #define DELAY_MAX_US 3000 @@ -149,6 +153,7 @@ struct amd_pmc_dev { struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ + struct pm_qos_request amd_pmc_pm_qos_req; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -603,6 +608,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg) rc = rtc_alarm_irq_enable(rtc_device, 0); dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); + /* + * Prevent CPUs from getting into deep idle states while sending OS_HINT + * which is otherwise generally safe to send when at least one of the CPUs + * is not in deep idle states. + */ + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, AMD_PMC_MAX_IDLE_STATE_LATENCY); + wake_up_all_idle_cpus(); + return rc; } @@ -620,24 +633,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) /* Activate CZN specific RTC functionality */ if (pdev->cpu_id == AMD_CPU_ID_CZN) { rc = amd_pmc_verify_czn_rtc(pdev, &arg); - if (rc < 0) - return rc; + if (rc) + goto fail; } /* Dump the IdleMask before we send hint to SMU */ amd_pmc_idlemask_read(pdev, dev, NULL); msg = amd_pmc_get_os_hint(pdev); rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0); - if (rc) + if (rc) { dev_err(pdev->dev, "suspend failed\n"); + goto fail; + } if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF); - if (rc) { + if (rc) { dev_err(pdev->dev, "error writing to STB\n"); - return rc; + goto fail; } + return 0; +fail: + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); return rc; } @@ -661,12 +681,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) /* Write data incremented by 1 to distinguish in stb_read */ if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1); - if (rc) { + if (rc) dev_err(pdev->dev, "error writing to STB\n"); - return rc; - } - return 0; + /* Restore the QoS request back to defaults if it was set */ + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); + + return rc; } static const struct dev_pm_ops amd_pmc_pm_ops = { @@ -838,6 +861,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); + cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE); return 0; err_pci_dev_put:

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] platform/x86: amd-pmc: Set QOS during suspend on CZN w/ timer" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 32370191c0851da069d242f581cbe2fdb80040cb Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello(a)amd.com> Date: Wed, 23 Feb 2022 11:52:37 -0600 Subject: [PATCH] platform/x86: amd-pmc: Set QOS during suspend on CZN w/ timer wakeup commit 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup") adds support for using another platform timer in lieu of the RTC which doesn't work properly on some systems. This path was validated and worked well before submission. During the 5.16-rc1 merge window other patches were merged that caused this to stop working properly. When this feature was used with 5.16-rc1 or later some OEM laptops with the matching firmware requirements from that commit would shutdown instead of program a timer based wakeup. This was bisected to commit 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path"). This wasn't supposed to cause any negative impacts and also tested well on both Intel and ARM platforms. However this changed the semantics of when CPUs are allowed to be in the deepest state. For the AMD systems in question it appears this causes a firmware crash for timer based wakeup. It's hypothesized to be caused by the `amd-pmc` driver sending `OS_HINT` and all the CPUs going into a deep state while the timer is still being programmed. It's likely a firmware bug, but to avoid it don't allow setting CPUs into the deepest state while using CZN timer wakeup path. If later it's discovered that this also occurs from "regular" suspends without a timer as well or on other silicon, this may be later expanded to run in the suspend path for more scenarios. Cc: stable(a)vger.kernel.org # 5.16+ Suggested-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com> Link: https://lore.kernel.org/linux-acpi/BL1PR12MB51570F5BD05980A0DCA1F3F4E23A9@B… Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path") Fixes: 23f62d7ab25b ("PM: sleep: Pause cpuidle later and resume it earlier during system transitions") Fixes: 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup") Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com> Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com> Link: https://lore.kernel.org/r/20220223175237.6209-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 69fdbb0d9f45..425a86108f75 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/platform_device.h> +#include <linux/pm_qos.h> #include <linux/rtc.h> #include <linux/suspend.h> #include <linux/seq_file.h> @@ -95,6 +96,9 @@ #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 +/* QoS request for letting CPUs in idle states, but not the deepest */ +#define AMD_PMC_MAX_IDLE_STATE_LATENCY 3 + #define SOC_SUBSYSTEM_IP_MAX 12 #define DELAY_MIN_US 2000 #define DELAY_MAX_US 3000 @@ -149,6 +153,7 @@ struct amd_pmc_dev { struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ + struct pm_qos_request amd_pmc_pm_qos_req; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -603,6 +608,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg) rc = rtc_alarm_irq_enable(rtc_device, 0); dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); + /* + * Prevent CPUs from getting into deep idle states while sending OS_HINT + * which is otherwise generally safe to send when at least one of the CPUs + * is not in deep idle states. + */ + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, AMD_PMC_MAX_IDLE_STATE_LATENCY); + wake_up_all_idle_cpus(); + return rc; } @@ -620,24 +633,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) /* Activate CZN specific RTC functionality */ if (pdev->cpu_id == AMD_CPU_ID_CZN) { rc = amd_pmc_verify_czn_rtc(pdev, &arg); - if (rc < 0) - return rc; + if (rc) + goto fail; } /* Dump the IdleMask before we send hint to SMU */ amd_pmc_idlemask_read(pdev, dev, NULL); msg = amd_pmc_get_os_hint(pdev); rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0); - if (rc) + if (rc) { dev_err(pdev->dev, "suspend failed\n"); + goto fail; + } if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF); - if (rc) { + if (rc) { dev_err(pdev->dev, "error writing to STB\n"); - return rc; + goto fail; } + return 0; +fail: + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); return rc; } @@ -661,12 +681,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) /* Write data incremented by 1 to distinguish in stb_read */ if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1); - if (rc) { + if (rc) dev_err(pdev->dev, "error writing to STB\n"); - return rc; - } - return 0; + /* Restore the QoS request back to defaults if it was set */ + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); + + return rc; } static const struct dev_pm_ops amd_pmc_pm_ops = { @@ -838,6 +861,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); + cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE); return 0; err_pci_dev_put:

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/i915/display/adlp: Implement new step in the TC voltage" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 5ff59dddacd4738edcbd01847d9df7682348cf86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza(a)intel.com> Date: Thu, 13 Jan 2022 09:48:26 -0800 Subject: [PATCH] drm/i915/display/adlp: Implement new step in the TC voltage swing prog sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TC voltage swing programming sequence was updated with a new step. BSpec: 54956 Cc: stable(a)vger.kernel.org Cc: Jani Nikula <jani.nikula(a)linux.intel.com> Cc: Clint Taylor <clinton.a.taylor(a)intel.com> Cc: Imre Deak <imre.deak(a)intel.com> Signed-off-by: José Roberto de Souza <jose.souza(a)intel.com> Reviewed-by: Clint Taylor <Clinton.A.Taylor(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220113174826.50272-1-jose.s… diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 6ee0f77b7927..4e93eac926a5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1300,6 +1300,28 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), DKL_TX_DP20BITMODE, 0); + + if (IS_ALDERLAKE_P(dev_priv)) { + u32 val; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + if (ln == 0) { + val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0); + val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(2); + } else { + val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(3); + val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(3); + } + } else { + val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0); + val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0); + } + + intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK | + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, + val); + } } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b3a05ed86734..4424807c8dec 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9968,8 +9968,12 @@ enum skl_power_gate { _DKL_PHY2_BASE) + \ _DKL_TX_DPCNTL1) -#define _DKL_TX_DPCNTL2 0x2C8 -#define DKL_TX_DP20BITMODE (1 << 2) +#define _DKL_TX_DPCNTL2 0x2C8 +#define DKL_TX_DP20BITMODE REG_BIT(2) +#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK REG_GENMASK(4, 3) +#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK, (val)) +#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK REG_GENMASK(6, 5) +#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, (val)) #define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \ _DKL_PHY1_BASE, \ _DKL_PHY2_BASE) + \

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] brcmfmac: pcie: Fix crashes due to early IRQs" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b50255c83b914defd61a57fbc81d452334b63f4c Mon Sep 17 00:00:00 2001 From: Hector Martin <marcan(a)marcan.st> Date: Tue, 1 Feb 2022 01:07:10 +0900 Subject: [PATCH] brcmfmac: pcie: Fix crashes due to early IRQs The driver was enabling IRQs before the message processing was initialized. This could cause IRQs to come in too early and crash the driver. Instead, move the IRQ enable and hostready to a bus preinit function, at which point everything is properly initialized. Fixes: 9e37f045d5e7 ("brcmfmac: Adding PCIe bus layer support.") Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org> Reviewed-by: Arend van Spriel <arend.vanspriel(a)broadcom.com> Cc: stable(a)vger.kernel.org Signed-off-by: Hector Martin <marcan(a)marcan.st> Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com> Signed-off-by: Kalle Valo <kvalo(a)kernel.org> Link: https://lore.kernel.org/r/20220131160713.245637-7-marcan@marcan.st diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 3f3ca7612bcd..55f0111283c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1315,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1423,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1795,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] brcmfmac: pcie: Fix crashes due to early IRQs" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b50255c83b914defd61a57fbc81d452334b63f4c Mon Sep 17 00:00:00 2001 From: Hector Martin <marcan(a)marcan.st> Date: Tue, 1 Feb 2022 01:07:10 +0900 Subject: [PATCH] brcmfmac: pcie: Fix crashes due to early IRQs The driver was enabling IRQs before the message processing was initialized. This could cause IRQs to come in too early and crash the driver. Instead, move the IRQ enable and hostready to a bus preinit function, at which point everything is properly initialized. Fixes: 9e37f045d5e7 ("brcmfmac: Adding PCIe bus layer support.") Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org> Reviewed-by: Arend van Spriel <arend.vanspriel(a)broadcom.com> Cc: stable(a)vger.kernel.org Signed-off-by: Hector Martin <marcan(a)marcan.st> Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com> Signed-off-by: Kalle Valo <kvalo(a)kernel.org> Link: https://lore.kernel.org/r/20220131160713.245637-7-marcan@marcan.st diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 3f3ca7612bcd..55f0111283c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1315,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1423,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1795,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] brcmfmac: pcie: Fix crashes due to early IRQs" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b50255c83b914defd61a57fbc81d452334b63f4c Mon Sep 17 00:00:00 2001 From: Hector Martin <marcan(a)marcan.st> Date: Tue, 1 Feb 2022 01:07:10 +0900 Subject: [PATCH] brcmfmac: pcie: Fix crashes due to early IRQs The driver was enabling IRQs before the message processing was initialized. This could cause IRQs to come in too early and crash the driver. Instead, move the IRQ enable and hostready to a bus preinit function, at which point everything is properly initialized. Fixes: 9e37f045d5e7 ("brcmfmac: Adding PCIe bus layer support.") Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org> Reviewed-by: Arend van Spriel <arend.vanspriel(a)broadcom.com> Cc: stable(a)vger.kernel.org Signed-off-by: Hector Martin <marcan(a)marcan.st> Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com> Signed-off-by: Kalle Valo <kvalo(a)kernel.org> Link: https://lore.kernel.org/r/20220131160713.245637-7-marcan@marcan.st diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 3f3ca7612bcd..55f0111283c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1315,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1423,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1795,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/fb-helper: Mark screen buffers in system memory with" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cd9f7f7ac5932129fe81b4c7559cfcb226ec7c5c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann <tzimmermann(a)suse.de> Date: Tue, 1 Feb 2022 12:53:05 +0100 Subject: [PATCH] drm/fb-helper: Mark screen buffers in system memory with FBINFO_VIRTFB Mark screen buffers in system memory with FBINFO_VIRTFB. Otherwise, fbdev deferred I/O marks mmap'ed areas of system memory with VM_IO. (There's an inverse relationship between the two flags.) For shadow buffers, also set the FBINFO_READS_FAST hint. v3: * change FB_ to FBINFO_ in commit description v2: * updated commit description (Daniel) * added Fixes tag Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de> Fixes: d536540f304c ("drm/fb-helper: Add generic fbdev emulation .fb_probe function") Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Cc: dri-devel(a)lists.freedesktop.org Cc: <stable(a)vger.kernel.org> # v4.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20220201115305.9333-1-tzimmer… diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 9727a59d35fd..805c5a666490 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2340,6 +2340,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = sizes->surface_height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; + fbi->flags = FBINFO_DEFAULT; drm_fb_helper_fill_info(fbi, fb_helper, sizes); @@ -2347,19 +2348,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_buffer = vzalloc(fbi->screen_size); if (!fbi->screen_buffer) return -ENOMEM; + fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; fbi->fbdefio = &drm_fbdev_defio; - fb_deferred_io_init(fbi); } else { /* buffer is mapped for HW framebuffer */ ret = drm_client_buffer_vmap(fb_helper->buffer, &map); if (ret) return ret; - if (map.is_iomem) + if (map.is_iomem) { fbi->screen_base = map.vaddr_iomem; - else + } else { fbi->screen_buffer = map.vaddr; + fbi->flags |= FBINFO_VIRTFB; + } /* * Shamelessly leak the physical address to user-space. As

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/fb-helper: Mark screen buffers in system memory with" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cd9f7f7ac5932129fe81b4c7559cfcb226ec7c5c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann <tzimmermann(a)suse.de> Date: Tue, 1 Feb 2022 12:53:05 +0100 Subject: [PATCH] drm/fb-helper: Mark screen buffers in system memory with FBINFO_VIRTFB Mark screen buffers in system memory with FBINFO_VIRTFB. Otherwise, fbdev deferred I/O marks mmap'ed areas of system memory with VM_IO. (There's an inverse relationship between the two flags.) For shadow buffers, also set the FBINFO_READS_FAST hint. v3: * change FB_ to FBINFO_ in commit description v2: * updated commit description (Daniel) * added Fixes tag Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de> Fixes: d536540f304c ("drm/fb-helper: Add generic fbdev emulation .fb_probe function") Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Cc: dri-devel(a)lists.freedesktop.org Cc: <stable(a)vger.kernel.org> # v4.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20220201115305.9333-1-tzimmer… diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 9727a59d35fd..805c5a666490 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2340,6 +2340,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = sizes->surface_height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; + fbi->flags = FBINFO_DEFAULT; drm_fb_helper_fill_info(fbi, fb_helper, sizes); @@ -2347,19 +2348,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_buffer = vzalloc(fbi->screen_size); if (!fbi->screen_buffer) return -ENOMEM; + fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; fbi->fbdefio = &drm_fbdev_defio; - fb_deferred_io_init(fbi); } else { /* buffer is mapped for HW framebuffer */ ret = drm_client_buffer_vmap(fb_helper->buffer, &map); if (ret) return ret; - if (map.is_iomem) + if (map.is_iomem) { fbi->screen_base = map.vaddr_iomem; - else + } else { fbi->screen_buffer = map.vaddr; + fbi->flags |= FBINFO_VIRTFB; + } /* * Shamelessly leak the physical address to user-space. As

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/fb-helper: Mark screen buffers in system memory with" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cd9f7f7ac5932129fe81b4c7559cfcb226ec7c5c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann <tzimmermann(a)suse.de> Date: Tue, 1 Feb 2022 12:53:05 +0100 Subject: [PATCH] drm/fb-helper: Mark screen buffers in system memory with FBINFO_VIRTFB Mark screen buffers in system memory with FBINFO_VIRTFB. Otherwise, fbdev deferred I/O marks mmap'ed areas of system memory with VM_IO. (There's an inverse relationship between the two flags.) For shadow buffers, also set the FBINFO_READS_FAST hint. v3: * change FB_ to FBINFO_ in commit description v2: * updated commit description (Daniel) * added Fixes tag Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de> Fixes: d536540f304c ("drm/fb-helper: Add generic fbdev emulation .fb_probe function") Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Cc: dri-devel(a)lists.freedesktop.org Cc: <stable(a)vger.kernel.org> # v4.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20220201115305.9333-1-tzimmer… diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 9727a59d35fd..805c5a666490 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2340,6 +2340,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = sizes->surface_height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; + fbi->flags = FBINFO_DEFAULT; drm_fb_helper_fill_info(fbi, fb_helper, sizes); @@ -2347,19 +2348,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_buffer = vzalloc(fbi->screen_size); if (!fbi->screen_buffer) return -ENOMEM; + fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; fbi->fbdefio = &drm_fbdev_defio; - fb_deferred_io_init(fbi); } else { /* buffer is mapped for HW framebuffer */ ret = drm_client_buffer_vmap(fb_helper->buffer, &map); if (ret) return ret; - if (map.is_iomem) + if (map.is_iomem) { fbi->screen_base = map.vaddr_iomem; - else + } else { fbi->screen_buffer = map.vaddr; + fbi->flags |= FBINFO_VIRTFB; + } /* * Shamelessly leak the physical address to user-space. As

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: use __vcalloc for very large allocations" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37b2a6510a48ca361ced679f92682b7b7d7d0330 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini(a)redhat.com> Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH] KVM: use __vcalloc for very large allocations Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable(a)vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand <david(a)redhat.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e414ca44839f..be441403925b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 68eb1fb548b6..2e09d1b6249f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -47,8 +47,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, continue; slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } @@ -75,7 +75,8 @@ int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); if (gfn_track == NULL) return -ENOMEM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f79bf4552082..4fa4d8269e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11838,7 +11838,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11875,7 +11875,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c941b97fa133..69c318fdff61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1274,9 +1274,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: s390x: fix SCK locking" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c0573ba5c5a2244dc02060b1f374d4593c1d20b7 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: [PATCH] KVM: s390x: fix SCK locking When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Reviewed-by: Christian Borntraeger <borntraeger(a)linux.ibm.com> Reviewed-by: Janis Schoetterl-Glausch <scgl(a)linux.ibm.com> Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable(a)vger.kernel.org Signed-off-by: Christian Borntraeger <borntraeger(a)linux.ibm.com> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b5ea95cf8686..b53ff693b66e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3961,14 +3961,12 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; unsigned long i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3989,7 +3987,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4ba8fc30d87a..798955b62fa3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -358,8 +358,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 30b24c42ef99..5beb7a4a11b3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, &gtod); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, &gtod)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: s390x: fix SCK locking" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From c0573ba5c5a2244dc02060b1f374d4593c1d20b7 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: [PATCH] KVM: s390x: fix SCK locking When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Reviewed-by: Christian Borntraeger <borntraeger(a)linux.ibm.com> Reviewed-by: Janis Schoetterl-Glausch <scgl(a)linux.ibm.com> Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable(a)vger.kernel.org Signed-off-by: Christian Borntraeger <borntraeger(a)linux.ibm.com> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b5ea95cf8686..b53ff693b66e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3961,14 +3961,12 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; unsigned long i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3989,7 +3987,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4ba8fc30d87a..798955b62fa3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -358,8 +358,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 30b24c42ef99..5beb7a4a11b3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, &gtod); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, &gtod)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't access possibly stale fs_info data in" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001 From: Dongliang Mu <mudongliangabcd(a)gmail.com> Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH] btrfs: don't access possibly stale fs_info data in device_list_add Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363(a)syzkaller.appspotmail.com CC: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu <mudongliangabcd(a)gmail.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e3e13d4940b..1be7cb2f955f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm,

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix use-after-free on driver unbind" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 43acb728bbc40169d2e2425e84a80068270974be Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH] media: davinci: vpif: fix use-after-free on driver unbind The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org # 4.12 Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Lad Prabhakar <prabhakar.csengg(a)gmail.com> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 1f5eacf48580..4a260f4ed236 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,17 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -461,49 +480,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix use-after-free on driver unbind" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 43acb728bbc40169d2e2425e84a80068270974be Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH] media: davinci: vpif: fix use-after-free on driver unbind The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org # 4.12 Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Lad Prabhakar <prabhakar.csengg(a)gmail.com> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 1f5eacf48580..4a260f4ed236 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,17 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -461,49 +480,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix use-after-free on driver unbind" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 43acb728bbc40169d2e2425e84a80068270974be Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH] media: davinci: vpif: fix use-after-free on driver unbind The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org # 4.12 Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Lad Prabhakar <prabhakar.csengg(a)gmail.com> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 1f5eacf48580..4a260f4ed236 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,17 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -461,49 +480,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix use-after-free on driver unbind" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 43acb728bbc40169d2e2425e84a80068270974be Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH] media: davinci: vpif: fix use-after-free on driver unbind The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org # 4.12 Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Lad Prabhakar <prabhakar.csengg(a)gmail.com> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 1f5eacf48580..4a260f4ed236 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,17 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -461,49 +480,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix use-after-free on driver unbind" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 43acb728bbc40169d2e2425e84a80068270974be Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH] media: davinci: vpif: fix use-after-free on driver unbind The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org # 4.12 Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Lad Prabhakar <prabhakar.csengg(a)gmail.com> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 1f5eacf48580..4a260f4ed236 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,17 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -461,49 +480,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix unbalanced runtime PM enable" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d42b3ad105b5d3481f6a56bc789aa2b27aa09325 Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:23 +0100 Subject: [PATCH] media: davinci: vpif: fix unbalanced runtime PM enable Make sure to disable runtime PM before returning on probe errors. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 9752a5ec36f7..1f5eacf48580 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -428,6 +428,7 @@ static int vpif_probe(struct platform_device *pdev) static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) @@ -456,8 +457,8 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), @@ -491,6 +492,12 @@ static int vpif_probe(struct platform_device *pdev) } return 0; + +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return ret; } static int vpif_remove(struct platform_device *pdev)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix unbalanced runtime PM enable" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d42b3ad105b5d3481f6a56bc789aa2b27aa09325 Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:23 +0100 Subject: [PATCH] media: davinci: vpif: fix unbalanced runtime PM enable Make sure to disable runtime PM before returning on probe errors. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 9752a5ec36f7..1f5eacf48580 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -428,6 +428,7 @@ static int vpif_probe(struct platform_device *pdev) static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) @@ -456,8 +457,8 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), @@ -491,6 +492,12 @@ static int vpif_probe(struct platform_device *pdev) } return 0; + +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return ret; } static int vpif_remove(struct platform_device *pdev)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix unbalanced runtime PM enable" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d42b3ad105b5d3481f6a56bc789aa2b27aa09325 Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:23 +0100 Subject: [PATCH] media: davinci: vpif: fix unbalanced runtime PM enable Make sure to disable runtime PM before returning on probe errors. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 9752a5ec36f7..1f5eacf48580 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -428,6 +428,7 @@ static int vpif_probe(struct platform_device *pdev) static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) @@ -456,8 +457,8 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), @@ -491,6 +492,12 @@ static int vpif_probe(struct platform_device *pdev) } return 0; + +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return ret; } static int vpif_remove(struct platform_device *pdev)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: davinci: vpif: fix unbalanced runtime PM enable" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d42b3ad105b5d3481f6a56bc789aa2b27aa09325 Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Wed, 22 Dec 2021 15:20:23 +0100 Subject: [PATCH] media: davinci: vpif: fix unbalanced runtime PM enable Make sure to disable runtime PM before returning on probe errors. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable(a)vger.kernel.org Cc: Kevin Hilman <khilman(a)baylibre.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 9752a5ec36f7..1f5eacf48580 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -428,6 +428,7 @@ static int vpif_probe(struct platform_device *pdev) static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) @@ -456,8 +457,8 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), @@ -491,6 +492,12 @@ static int vpif_probe(struct platform_device *pdev) } return 0; + +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return ret; } static int vpif_remove(struct platform_device *pdev)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: omap3isp: Use struct_group() for memcpy() region" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook <keescook(a)chromium.org> Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: [PATCH] media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann <arnd(a)arndb.de> Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable(a)vger.kernel.org Reviewed-by: Gustavo A. R. Silva <gustavoars(a)kernel.org> Signed-off-by: Kees Cook <keescook(a)chromium.org> Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com> Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf..68cf68dbcace 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: omap3isp: Use struct_group() for memcpy() region" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook <keescook(a)chromium.org> Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: [PATCH] media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann <arnd(a)arndb.de> Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable(a)vger.kernel.org Reviewed-by: Gustavo A. R. Silva <gustavoars(a)kernel.org> Signed-off-by: Kees Cook <keescook(a)chromium.org> Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com> Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf..68cf68dbcace 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - only allow with rsa" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:02 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - only allow with rsa The pkcs1pad template can be instantiated with an arbitrary akcipher algorithm, which doesn't make sense; it is specifically an RSA padding scheme. Make it check that the underlying algorithm really is RSA. Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm") Cc: <stable(a)vger.kernel.org> # v4.5+ Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea6..1b3545781425 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: omap3isp: Use struct_group() for memcpy() region" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook <keescook(a)chromium.org> Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: [PATCH] media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann <arnd(a)arndb.de> Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable(a)vger.kernel.org Reviewed-by: Gustavo A. R. Silva <gustavoars(a)kernel.org> Signed-off-by: Kees Cook <keescook(a)chromium.org> Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com> Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf..68cf68dbcace 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] media: omap3isp: Use struct_group() for memcpy() region" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook <keescook(a)chromium.org> Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: [PATCH] media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann <arnd(a)arndb.de> Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable(a)vger.kernel.org Reviewed-by: Gustavo A. R. Silva <gustavoars(a)kernel.org> Signed-off-by: Kees Cook <keescook(a)chromium.org> Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com> Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf..68cf68dbcace 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - only allow with rsa" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:02 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - only allow with rsa The pkcs1pad template can be instantiated with an arbitrary akcipher algorithm, which doesn't make sense; it is specifically an RSA padding scheme. Make it check that the underlying algorithm really is RSA. Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm") Cc: <stable(a)vger.kernel.org> # v4.5+ Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea6..1b3545781425 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - only allow with rsa" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:02 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - only allow with rsa The pkcs1pad template can be instantiated with an arbitrary akcipher algorithm, which doesn't make sense; it is specifically an RSA padding scheme. Make it check that the underlying algorithm really is RSA. Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm") Cc: <stable(a)vger.kernel.org> # v4.5+ Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea6..1b3545781425 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - only allow with rsa" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:02 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - only allow with rsa The pkcs1pad template can be instantiated with an arbitrary akcipher algorithm, which doesn't make sense; it is specifically an RSA padding scheme. Make it check that the underlying algorithm really is RSA. Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm") Cc: <stable(a)vger.kernel.org> # v4.5+ Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea6..1b3545781425 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - fix buffer overread in" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From a24611ea356c7f3f0ec926da11b9482ac1f414fd Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:05 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete() Before checking whether the expected digest_info is present, we need to check that there are enough bytes remaining. Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 6b556ddeb3a0..9d804831c8b3 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - fix buffer overread in" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From a24611ea356c7f3f0ec926da11b9482ac1f414fd Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:05 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete() Before checking whether the expected digest_info is present, we need to check that there are enough bytes remaining. Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 6b556ddeb3a0..9d804831c8b3 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - fix buffer overread in" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From a24611ea356c7f3f0ec926da11b9482ac1f414fd Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:05 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete() Before checking whether the expected digest_info is present, we need to check that there are enough bytes remaining. Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 6b556ddeb3a0..9d804831c8b3 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - restore signature length check" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d3481accd974541e6a5d6a1fb588924a3519c36e Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:04 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - restore signature length check RSA PKCS#1 v1.5 signatures are required to be the same length as the RSA key size. RFC8017 specifically requires the verifier to check this (https://datatracker.ietf.org/doc/html/rfc8017#section-8.2.2). Commit a49de377e051 ("crypto: Add hash param to pkcs1pad") changed the kernel to allow longer signatures, but didn't explain this part of the change; it seems to be unrelated to the rest of the commit. Revert this change, since it doesn't appear to be correct. We can be pretty sure that no one is relying on overly-long signatures (which would have to be front-padded with zeroes) being supported, given that they would have been broken since commit c7381b012872 ("crypto: akcipher - new verify API for public key algorithms"). Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Suggested-by: Vitaly Chikunov <vt(a)altlinux.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 7b223adebabf..6b556ddeb3a0 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -538,7 +538,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - restore signature length check" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d3481accd974541e6a5d6a1fb588924a3519c36e Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:04 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - restore signature length check RSA PKCS#1 v1.5 signatures are required to be the same length as the RSA key size. RFC8017 specifically requires the verifier to check this (https://datatracker.ietf.org/doc/html/rfc8017#section-8.2.2). Commit a49de377e051 ("crypto: Add hash param to pkcs1pad") changed the kernel to allow longer signatures, but didn't explain this part of the change; it seems to be unrelated to the rest of the commit. Revert this change, since it doesn't appear to be correct. We can be pretty sure that no one is relying on overly-long signatures (which would have to be front-padded with zeroes) being supported, given that they would have been broken since commit c7381b012872 ("crypto: akcipher - new verify API for public key algorithms"). Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Suggested-by: Vitaly Chikunov <vt(a)altlinux.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 7b223adebabf..6b556ddeb3a0 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -538,7 +538,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] crypto: rsa-pkcs1pad - restore signature length check" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d3481accd974541e6a5d6a1fb588924a3519c36e Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)google.com> Date: Tue, 18 Jan 2022 16:13:04 -0800 Subject: [PATCH] crypto: rsa-pkcs1pad - restore signature length check RSA PKCS#1 v1.5 signatures are required to be the same length as the RSA key size. RFC8017 specifically requires the verifier to check this (https://datatracker.ietf.org/doc/html/rfc8017#section-8.2.2). Commit a49de377e051 ("crypto: Add hash param to pkcs1pad") changed the kernel to allow longer signatures, but didn't explain this part of the change; it seems to be unrelated to the rest of the commit. Revert this change, since it doesn't appear to be correct. We can be pretty sure that no one is relying on overly-long signatures (which would have to be front-padded with zeroes) being supported, given that they would have been broken since commit c7381b012872 ("crypto: akcipher - new verify API for public key algorithms"). Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: <stable(a)vger.kernel.org> # v4.6+ Cc: Tadeusz Struk <tadeusz.struk(a)linaro.org> Suggested-by: Vitaly Chikunov <vt(a)altlinux.org> Signed-off-by: Eric Biggers <ebiggers(a)google.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 7b223adebabf..6b556ddeb3a0 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -538,7 +538,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] pstore: Don't use semaphores in always-atomic-context code" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8126b1c73108bc691f5643df19071a59a69d0bc6 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Mon, 14 Mar 2022 19:59:53 +0100 Subject: [PATCH] pstore: Don't use semaphores in always-atomic-context code pstore_dump() is *always* invoked in atomic context (nowadays in an RCU read-side critical section, before that under a spinlock). It doesn't make sense to try to use semaphores here. This is mostly a revert of commit ea84b580b955 ("pstore: Convert buf_lock to semaphore"), except that two parts aren't restored back exactly as they were: - keep the lock initialization in pstore_register - in efi_pstore_write(), always set the "block" flag to false - omit "is_locked", that was unnecessary since commit 959217c84c27 ("pstore: Actually give up during locking failure") - fix the bailout message The actual problem that the buggy commit was trying to address may have been that the use of preemptible() in efi_pstore_write() was wrong - it only looks at preempt_count() and the state of IRQs, but __rcu_read_lock() doesn't touch either of those under CONFIG_PREEMPT_RCU. (Sidenote: CONFIG_PREEMPT_RCU means that the scheduler can preempt tasks in RCU read-side critical sections, but you're not allowed to actively block/reschedule.) Lockdep probably never caught the problem because it's very rare that you actually hit the contended case, so lockdep always just sees the down_trylock(), not the down_interruptible(), and so it can't tell that there's a problem. Fixes: ea84b580b955 ("pstore: Convert buf_lock to semaphore") Cc: stable(a)vger.kernel.org Acked-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de> Signed-off-by: Jann Horn <jannh(a)google.com> Signed-off-by: Kees Cook <keescook(a)chromium.org> Link: https://lore.kernel.org/r/20220314185953.2068993-1-jannh@google.com diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 0ef086e43090..7e771c56c13c 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - preemptible(), record->size, record->psi->buf); + false, record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f243cb5e6a4f..e26162f102ff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -143,21 +143,22 @@ static void pstore_timer_kick(void) mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } -/* - * Should pstore_dump() wait for a concurrent pstore_dump()? If - * not, the current pstore_dump() will report a failure to dump - * and return. - */ -static bool pstore_cannot_wait(enum kmsg_dump_reason reason) +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) { - /* In NMI path, pstore shouldn't block regardless of reason. */ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked. */ + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ case KMSG_DUMP_EMERG: return true; default: @@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; + unsigned long flags = 0; int ret; why = kmsg_dump_reason_str(reason); - if (down_trylock(&psinfo->buf_lock)) { - /* Failed to acquire lock: give up if we cannot wait. */ - if (pstore_cannot_wait(reason)) { - pr_err("dump skipped in %s path: may corrupt error record\n", - in_nmi() ? "NMI" : why); - return; - } - if (down_interruptible(&psinfo->buf_lock)) { - pr_err("could not grab semaphore?!\n"); + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); return; } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); } kmsg_dump_rewind(&iter); @@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - - up(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); - sema_init(&psinfo->buf_lock, 1); + spin_lock_init(&psinfo->buf_lock); if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31..e97a8188f0fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/kmsg_dump.h> #include <linux/mutex.h> -#include <linux/semaphore.h> +#include <linux/spinlock.h> #include <linux/time.h> #include <linux/types.h> @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] pstore: Don't use semaphores in always-atomic-context code" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8126b1c73108bc691f5643df19071a59a69d0bc6 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Mon, 14 Mar 2022 19:59:53 +0100 Subject: [PATCH] pstore: Don't use semaphores in always-atomic-context code pstore_dump() is *always* invoked in atomic context (nowadays in an RCU read-side critical section, before that under a spinlock). It doesn't make sense to try to use semaphores here. This is mostly a revert of commit ea84b580b955 ("pstore: Convert buf_lock to semaphore"), except that two parts aren't restored back exactly as they were: - keep the lock initialization in pstore_register - in efi_pstore_write(), always set the "block" flag to false - omit "is_locked", that was unnecessary since commit 959217c84c27 ("pstore: Actually give up during locking failure") - fix the bailout message The actual problem that the buggy commit was trying to address may have been that the use of preemptible() in efi_pstore_write() was wrong - it only looks at preempt_count() and the state of IRQs, but __rcu_read_lock() doesn't touch either of those under CONFIG_PREEMPT_RCU. (Sidenote: CONFIG_PREEMPT_RCU means that the scheduler can preempt tasks in RCU read-side critical sections, but you're not allowed to actively block/reschedule.) Lockdep probably never caught the problem because it's very rare that you actually hit the contended case, so lockdep always just sees the down_trylock(), not the down_interruptible(), and so it can't tell that there's a problem. Fixes: ea84b580b955 ("pstore: Convert buf_lock to semaphore") Cc: stable(a)vger.kernel.org Acked-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de> Signed-off-by: Jann Horn <jannh(a)google.com> Signed-off-by: Kees Cook <keescook(a)chromium.org> Link: https://lore.kernel.org/r/20220314185953.2068993-1-jannh@google.com diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 0ef086e43090..7e771c56c13c 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - preemptible(), record->size, record->psi->buf); + false, record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f243cb5e6a4f..e26162f102ff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -143,21 +143,22 @@ static void pstore_timer_kick(void) mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } -/* - * Should pstore_dump() wait for a concurrent pstore_dump()? If - * not, the current pstore_dump() will report a failure to dump - * and return. - */ -static bool pstore_cannot_wait(enum kmsg_dump_reason reason) +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) { - /* In NMI path, pstore shouldn't block regardless of reason. */ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked. */ + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ case KMSG_DUMP_EMERG: return true; default: @@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; + unsigned long flags = 0; int ret; why = kmsg_dump_reason_str(reason); - if (down_trylock(&psinfo->buf_lock)) { - /* Failed to acquire lock: give up if we cannot wait. */ - if (pstore_cannot_wait(reason)) { - pr_err("dump skipped in %s path: may corrupt error record\n", - in_nmi() ? "NMI" : why); - return; - } - if (down_interruptible(&psinfo->buf_lock)) { - pr_err("could not grab semaphore?!\n"); + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); return; } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); } kmsg_dump_rewind(&iter); @@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - - up(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); - sema_init(&psinfo->buf_lock, 1); + spin_lock_init(&psinfo->buf_lock); if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31..e97a8188f0fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/kmsg_dump.h> #include <linux/mutex.h> -#include <linux/semaphore.h> +#include <linux/spinlock.h> #include <linux/time.h> #include <linux/types.h> @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] pstore: Don't use semaphores in always-atomic-context code" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8126b1c73108bc691f5643df19071a59a69d0bc6 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Mon, 14 Mar 2022 19:59:53 +0100 Subject: [PATCH] pstore: Don't use semaphores in always-atomic-context code pstore_dump() is *always* invoked in atomic context (nowadays in an RCU read-side critical section, before that under a spinlock). It doesn't make sense to try to use semaphores here. This is mostly a revert of commit ea84b580b955 ("pstore: Convert buf_lock to semaphore"), except that two parts aren't restored back exactly as they were: - keep the lock initialization in pstore_register - in efi_pstore_write(), always set the "block" flag to false - omit "is_locked", that was unnecessary since commit 959217c84c27 ("pstore: Actually give up during locking failure") - fix the bailout message The actual problem that the buggy commit was trying to address may have been that the use of preemptible() in efi_pstore_write() was wrong - it only looks at preempt_count() and the state of IRQs, but __rcu_read_lock() doesn't touch either of those under CONFIG_PREEMPT_RCU. (Sidenote: CONFIG_PREEMPT_RCU means that the scheduler can preempt tasks in RCU read-side critical sections, but you're not allowed to actively block/reschedule.) Lockdep probably never caught the problem because it's very rare that you actually hit the contended case, so lockdep always just sees the down_trylock(), not the down_interruptible(), and so it can't tell that there's a problem. Fixes: ea84b580b955 ("pstore: Convert buf_lock to semaphore") Cc: stable(a)vger.kernel.org Acked-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de> Signed-off-by: Jann Horn <jannh(a)google.com> Signed-off-by: Kees Cook <keescook(a)chromium.org> Link: https://lore.kernel.org/r/20220314185953.2068993-1-jannh@google.com diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 0ef086e43090..7e771c56c13c 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - preemptible(), record->size, record->psi->buf); + false, record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f243cb5e6a4f..e26162f102ff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -143,21 +143,22 @@ static void pstore_timer_kick(void) mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } -/* - * Should pstore_dump() wait for a concurrent pstore_dump()? If - * not, the current pstore_dump() will report a failure to dump - * and return. - */ -static bool pstore_cannot_wait(enum kmsg_dump_reason reason) +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) { - /* In NMI path, pstore shouldn't block regardless of reason. */ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked. */ + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ case KMSG_DUMP_EMERG: return true; default: @@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; + unsigned long flags = 0; int ret; why = kmsg_dump_reason_str(reason); - if (down_trylock(&psinfo->buf_lock)) { - /* Failed to acquire lock: give up if we cannot wait. */ - if (pstore_cannot_wait(reason)) { - pr_err("dump skipped in %s path: may corrupt error record\n", - in_nmi() ? "NMI" : why); - return; - } - if (down_interruptible(&psinfo->buf_lock)) { - pr_err("could not grab semaphore?!\n"); + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); return; } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); } kmsg_dump_rewind(&iter); @@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - - up(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); - sema_init(&psinfo->buf_lock, 1); + spin_lock_init(&psinfo->buf_lock); if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31..e97a8188f0fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/kmsg_dump.h> #include <linux/mutex.h> -#include <linux/semaphore.h> +#include <linux/spinlock.h> #include <linux/time.h> #include <linux/types.h> @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] drm/edid: fix CEA extension byte #3 parsing" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula <jani.nikula(a)intel.com> Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: <stable(a)vger.kernel.org> Cc: Shawn C Lee <shawn.c.lee(a)intel.com> Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani… diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] block: fix rq-qos breakage from skipping rq_qos_done_bio()" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From aa1b46dcdc7baaf5fec0be25782ef24b26aa209e Mon Sep 17 00:00:00 2001 From: Tejun Heo <tj(a)kernel.org> Date: Sun, 13 Mar 2022 21:15:02 -1000 Subject: [PATCH] block: fix rq-qos breakage from skipping rq_qos_done_bio() a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set. While this fixed a potential oops, it also broke blk-iocost by skipping the done_bio callback for merged bios. Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(), rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED distinguishing the former from the latter. rq_qos_done_bio() is not called for bios which wenth through rq_qos_merge(). This royally confuses blk-iocost as the merged bios never finish and are considered perpetually in-flight. One reliably reproducible failure mode is an intermediate cgroup geting stuck active preventing its children from being activated due to the leaf-only rule, leading to loss of control. The following is from resctl-bench protection scenario which emulates isolating a web server like workload from a memory bomb run on an iocost configuration which should yield a reasonable level of protection. # cat /sys/block/nvme2n1/device/model Samsung SSD 970 PRO 512GB # cat /sys/fs/cgroup/io.cost.model 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025 # cat /sys/fs/cgroup/io.cost.qos 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00 # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82 lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6 Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96% The isolation result of 58.12% is close to what this device would show without any IO control. Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and calling rq_qos_done_bio() on them too. For consistency and clarity, rename BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into rq_qos_done_bio() so that it's next to the code paths that set the flags. With the patch applied, the above same benchmark shows: # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81 lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68 Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0% Signed-off-by: Tejun Heo <tj(a)kernel.org> Fixes: a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") Cc: stable(a)vger.kernel.org # v5.15+ Cc: Ming Lei <ming.lei(a)redhat.com> Cc: Yu Kuai <yukuai3(a)huawei.com> Reviewed-by: Ming Lei <ming.lei(a)redhat.com> Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/block/bio.c b/block/bio.c index 151cace2dbe1..33979f306e9e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1516,8 +1516,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + rq_qos_done_bio(bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 010e658d44a8..2f33932e72e3 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 3cfbc8668cba..68267007da1c 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ if (q->rq_qos) { - bio_set_flag(bio, BIO_TRACKED); + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } } @@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5561e58d158a..0c3563b45fe9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -324,7 +324,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b7fb0ae09009d076964afe4c1a2bde1ee2bd88a9 Mon Sep 17 00:00:00 2001 From: Ammar Faizi <ammarfaizi2(a)gnuweeb.org> Date: Fri, 25 Feb 2022 01:58:36 +0700 Subject: [PATCH] ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM Do not call snd_dma_free_pages() when snd_dma_alloc_pages() returns -ENOMEM because it leads to a NULL pointer dereference bug. The dmesg says: [ T1387] sof-audio-pci-intel-tgl 0000:00:1f.3: error: memory alloc failed: -12 [ T1387] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ T1387] #PF: supervisor read access in kernel mode [ T1387] #PF: error_code(0x0000) - not-present page [ T1387] PGD 0 P4D 0 [ T1387] Oops: 0000 [#1] PREEMPT SMP NOPTI [ T1387] CPU: 6 PID: 1387 Comm: alsa-sink-HDA A Tainted: G W 5.17.0-rc4-superb-owl-00055-g80d47f5de5e3 [ T1387] Hardware name: HP HP Laptop 14s-dq2xxx/87FD, BIOS F.15 09/15/2021 [ T1387] RIP: 0010:dma_free_noncontiguous+0x37/0x80 [ T1387] Code: [... snip ...] [ T1387] RSP: 0000:ffffc90002b87770 EFLAGS: 00010246 [ T1387] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ T1387] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888101db30d0 [ T1387] RBP: 00000000fffffff4 R08: 0000000000000000 R09: 0000000000000000 [ T1387] R10: 0000000000000000 R11: ffffc90002b874d0 R12: 0000000000000001 [ T1387] R13: 0000000000058000 R14: ffff888105260c68 R15: ffff888105260828 [ T1387] FS: 00007f42e2ffd640(0000) GS:ffff888466b80000(0000) knlGS:0000000000000000 [ T1387] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ T1387] CR2: 0000000000000000 CR3: 000000014acf0003 CR4: 0000000000770ee0 [ T1387] PKRU: 55555554 [ T1387] Call Trace: [ T1387] <TASK> [ T1387] cl_stream_prepare+0x10a/0x120 [snd_sof_intel_hda_common 146addf995b9279ae7f509621078cccbe4f875e1] [... snip ...] [ T1387] </TASK> Cc: Daniel Baluta <daniel.baluta(a)nxp.com> Cc: Jaroslav Kysela <perex(a)perex.cz> Cc: Kai Vehmanen <kai.vehmanen(a)linux.intel.com> Cc: Keyon Jie <yang.jie(a)linux.intel.com> Cc: Liam Girdwood <lgirdwood(a)gmail.com> Cc: Mark Brown <broonie(a)kernel.org> Cc: Rander Wang <rander.wang(a)intel.com> Cc: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com> Cc: Takashi Iwai <tiwai(a)suse.com> Cc: sound-open-firmware(a)alsa-project.org Cc: alsa-devel(a)alsa-project.org Cc: linux-kernel(a)vger.kernel.org Cc: stable(a)vger.kernel.org # v5.2+ Fixes: d16046ffa6de040bf580a64d5f4d0aa18258a854 ("ASoC: SOF: Intel: Add Intel specific HDA firmware loader") Link: https://lore.kernel.org/lkml/20220224145124.15985-1-ammarfaizi2@gnuweeb.org/ # v1 Link: https://lore.kernel.org/lkml/20220224180850.34592-1-ammarfaizi2@gnuweeb.org/ # v2 Link: https://lore.kernel.org/lkml/20220224182818.40301-1-ammarfaizi2@gnuweeb.org/ # v3 Reviewed-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com> Signed-off-by: Ammar Faizi <ammarfaizi2(a)gnuweeb.org> Link: https://lore.kernel.org/r/20220224185836.44907-1-ammarfaizi2@gnuweeb.org Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 33306d2023a7..9bbfdab8009d 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -47,7 +47,7 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); if (ret < 0) { dev_err(sdev->dev, "error: memory alloc failed: %d\n", ret); - goto error; + goto out_put; } hstream->period_bytes = 0;/* initialize period_bytes */ @@ -58,22 +58,23 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = hda_dsp_iccmax_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: iccmax stream prepare failed: %d\n", ret); - goto error; + goto out_free; } } else { ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: hdac prepare failed: %d\n", ret); - goto error; + goto out_free; } hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size); } return dsp_stream; -error: - hda_dsp_stream_put(sdev, direction, hstream->stream_tag); +out_free: snd_dma_free_pages(dmab); +out_put: + hda_dsp_stream_put(sdev, direction, hstream->stream_tag); return ERR_PTR(ret); }

3 years, 5 months

2
1
0 0

FAILED: patch "[PATCH] coredump: Use the vma snapshot in fill_files_note" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 390031c942116d4733310f0684beb8db19885fe6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm(a)xmission.com> Date: Tue, 8 Mar 2022 13:04:19 -0600 Subject: [PATCH] coredump: Use the vma snapshot in fill_files_note Matthew Wilcox reported that there is a missing mmap_lock in file_files_note that could possibly lead to a user after free. Solve this by using the existing vma snapshot for consistency and to avoid the need to take the mmap_lock anywhere in the coredump code except for dump_vma_snapshot. Update the dump_vma_snapshot to capture vm_pgoff and vm_file that are neeeded by fill_files_note. Add free_vma_snapshot to free the captured values of vm_file. Reported-by: Matthew Wilcox <willy(a)infradead.org> Link: https://lkml.kernel.org/r/20220131153740.2396974-1-willy@infradead.org Cc: stable(a)vger.kernel.org Fixes: a07279c9a8cd ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Fixes: 2aa362c49c31 ("coredump: extend core dump note section to contain file names of mapped files") Reviewed-by: Kees Cook <keescook(a)chromium.org> Signed-off-by: "Eric W. Biederman" <ebiederm(a)xmission.com> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7f0c391832cf..ca5296cae979 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1641,17 +1641,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static int fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; user_long_t *data; user_long_t *start_end_ofs; char *name_base, *name_curpos; + int i; /* *Estimated* file count and total data size needed */ - count = mm->map_count; + count = cprm->vma_count; if (count > UINT_MAX / 64) return -EINVAL; size = count * 64; @@ -1673,11 +1672,12 @@ static int fill_files_note(struct memelfnote *note) name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = &cprm->vma_meta[i]; struct file *file; const char *filename; - file = vma->vm_file; + file = m->file; if (!file) continue; filename = file_path(file, name_curpos, remaining); @@ -1697,9 +1697,9 @@ static int fill_files_note(struct memelfnote *note) memmove(name_curpos, filename, n); name_curpos += n; - *start_end_ofs++ = vma->vm_start; - *start_end_ofs++ = vma->vm_end; - *start_end_ofs++ = vma->vm_pgoff; + *start_end_ofs++ = m->start; + *start_end_ofs++ = m->end; + *start_end_ofs++ = m->pgoff; count++; } @@ -1710,7 +1710,7 @@ static int fill_files_note(struct memelfnote *note) * Count usually is less than mm->map_count, * we need to move filenames down. */ - n = mm->map_count - count; + n = cprm->vma_count - count; if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, @@ -1909,7 +1909,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - if (fill_files_note(&info->files) == 0) + if (fill_files_note(&info->files, cprm) == 0) info->size += notesize(&info->files); return 1; @@ -2098,7 +2098,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(info->notes + 3, current->mm); info->numnote = 4; - if (fill_files_note(info->notes + info->numnote) == 0) { + if (fill_files_note(info->notes + info->numnote, cprm) == 0) { info->notes_files = info->notes + info->numnote; info->numnote++; } diff --git a/fs/coredump.c b/fs/coredump.c index 7f100a637264..7ed7d601e5e0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -55,6 +55,7 @@ #include <trace/events/sched.h> static bool dump_vma_snapshot(struct coredump_params *cprm); +static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; @@ -765,7 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) dump_emit(&cprm, "", 1); } file_end_write(cprm.file); - kvfree(cprm.vma_meta); + free_vma_snapshot(&cprm); } if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -1099,6 +1100,20 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void free_vma_snapshot(struct coredump_params *cprm) +{ + if (cprm->vma_meta) { + int i; + for (i = 0; i < cprm->vma_count; i++) { + struct file *file = cprm->vma_meta[i].file; + if (file) + fput(file); + } + kvfree(cprm->vma_meta); + cprm->vma_meta = NULL; + } +} + /* * Under the mmap_lock, take a snapshot of relevant information about the task's * VMAs. @@ -1135,6 +1150,11 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) m->end = vma->vm_end; m->flags = vma->vm_flags; m->dump_size = vma_dump_size(vma, cprm->mm_flags); + m->pgoff = vma->vm_pgoff; + + m->file = vma->vm_file; + if (m->file) + get_file(m->file); } mmap_write_unlock(mm); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 7d05370e555e..08a1d3e7e46d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,6 +12,8 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; }; struct coredump_params {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] coredump: Use the vma snapshot in fill_files_note" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 390031c942116d4733310f0684beb8db19885fe6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm(a)xmission.com> Date: Tue, 8 Mar 2022 13:04:19 -0600 Subject: [PATCH] coredump: Use the vma snapshot in fill_files_note Matthew Wilcox reported that there is a missing mmap_lock in file_files_note that could possibly lead to a user after free. Solve this by using the existing vma snapshot for consistency and to avoid the need to take the mmap_lock anywhere in the coredump code except for dump_vma_snapshot. Update the dump_vma_snapshot to capture vm_pgoff and vm_file that are neeeded by fill_files_note. Add free_vma_snapshot to free the captured values of vm_file. Reported-by: Matthew Wilcox <willy(a)infradead.org> Link: https://lkml.kernel.org/r/20220131153740.2396974-1-willy@infradead.org Cc: stable(a)vger.kernel.org Fixes: a07279c9a8cd ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Fixes: 2aa362c49c31 ("coredump: extend core dump note section to contain file names of mapped files") Reviewed-by: Kees Cook <keescook(a)chromium.org> Signed-off-by: "Eric W. Biederman" <ebiederm(a)xmission.com> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7f0c391832cf..ca5296cae979 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1641,17 +1641,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static int fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; user_long_t *data; user_long_t *start_end_ofs; char *name_base, *name_curpos; + int i; /* *Estimated* file count and total data size needed */ - count = mm->map_count; + count = cprm->vma_count; if (count > UINT_MAX / 64) return -EINVAL; size = count * 64; @@ -1673,11 +1672,12 @@ static int fill_files_note(struct memelfnote *note) name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = &cprm->vma_meta[i]; struct file *file; const char *filename; - file = vma->vm_file; + file = m->file; if (!file) continue; filename = file_path(file, name_curpos, remaining); @@ -1697,9 +1697,9 @@ static int fill_files_note(struct memelfnote *note) memmove(name_curpos, filename, n); name_curpos += n; - *start_end_ofs++ = vma->vm_start; - *start_end_ofs++ = vma->vm_end; - *start_end_ofs++ = vma->vm_pgoff; + *start_end_ofs++ = m->start; + *start_end_ofs++ = m->end; + *start_end_ofs++ = m->pgoff; count++; } @@ -1710,7 +1710,7 @@ static int fill_files_note(struct memelfnote *note) * Count usually is less than mm->map_count, * we need to move filenames down. */ - n = mm->map_count - count; + n = cprm->vma_count - count; if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, @@ -1909,7 +1909,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - if (fill_files_note(&info->files) == 0) + if (fill_files_note(&info->files, cprm) == 0) info->size += notesize(&info->files); return 1; @@ -2098,7 +2098,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(info->notes + 3, current->mm); info->numnote = 4; - if (fill_files_note(info->notes + info->numnote) == 0) { + if (fill_files_note(info->notes + info->numnote, cprm) == 0) { info->notes_files = info->notes + info->numnote; info->numnote++; } diff --git a/fs/coredump.c b/fs/coredump.c index 7f100a637264..7ed7d601e5e0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -55,6 +55,7 @@ #include <trace/events/sched.h> static bool dump_vma_snapshot(struct coredump_params *cprm); +static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; @@ -765,7 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) dump_emit(&cprm, "", 1); } file_end_write(cprm.file); - kvfree(cprm.vma_meta); + free_vma_snapshot(&cprm); } if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -1099,6 +1100,20 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void free_vma_snapshot(struct coredump_params *cprm) +{ + if (cprm->vma_meta) { + int i; + for (i = 0; i < cprm->vma_count; i++) { + struct file *file = cprm->vma_meta[i].file; + if (file) + fput(file); + } + kvfree(cprm->vma_meta); + cprm->vma_meta = NULL; + } +} + /* * Under the mmap_lock, take a snapshot of relevant information about the task's * VMAs. @@ -1135,6 +1150,11 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) m->end = vma->vm_end; m->flags = vma->vm_flags; m->dump_size = vma_dump_size(vma, cprm->mm_flags); + m->pgoff = vma->vm_pgoff; + + m->file = vma->vm_file; + if (m->file) + get_file(m->file); } mmap_write_unlock(mm); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 7d05370e555e..08a1d3e7e46d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,6 +12,8 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; }; struct coredump_params {

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mmc: core: use sysfs_emit() instead of sprintf()" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f5d8a5fe77ce933f53eb8f2e22bb7a1a2019ea11 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov <s.shtylyov(a)omp.ru> Date: Tue, 8 Feb 2022 15:02:15 +0300 Subject: [PATCH] mmc: core: use sysfs_emit() instead of sprintf() sprintf() (still used in the MMC core for the sysfs output) is vulnerable to the buffer overflow. Use the new-fangled sysfs_emit() instead. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov <s.shtylyov(a)omp.ru> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/717729b2-d65b-c72e-9fac-471d28d00b5a@omp.ru Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org> diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 096ae624be9a..58a60afa650b 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -15,6 +15,7 @@ #include <linux/stat.h> #include <linux/of.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -34,13 +35,13 @@ static ssize_t type_show(struct device *dev, switch (card->type) { case MMC_TYPE_MMC: - return sprintf(buf, "MMC\n"); + return sysfs_emit(buf, "MMC\n"); case MMC_TYPE_SD: - return sprintf(buf, "SD\n"); + return sysfs_emit(buf, "SD\n"); case MMC_TYPE_SDIO: - return sprintf(buf, "SDIO\n"); + return sysfs_emit(buf, "SDIO\n"); case MMC_TYPE_SD_COMBO: - return sprintf(buf, "SDcombo\n"); + return sysfs_emit(buf, "SDcombo\n"); default: return -EFAULT; } diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h index 8105852c4b62..3996b191b68d 100644 --- a/drivers/mmc/core/bus.h +++ b/drivers/mmc/core/bus.h @@ -9,6 +9,7 @@ #define _MMC_CORE_BUS_H #include <linux/device.h> +#include <linux/sysfs.h> struct mmc_host; struct mmc_card; @@ -17,7 +18,7 @@ struct mmc_card; static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ { \ struct mmc_card *card = mmc_dev_to_card(dev); \ - return sprintf(buf, fmt, args); \ + return sysfs_emit(buf, fmt, args); \ } \ static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index bbbbcaf70a59..13abfcd130a5 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -812,12 +813,11 @@ static ssize_t mmc_fwrev_show(struct device *dev, { struct mmc_card *card = mmc_dev_to_card(dev); - if (card->ext_csd.rev < 7) { - return sprintf(buf, "0x%x\n", card->cid.fwrev); - } else { - return sprintf(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, - card->ext_csd.fwrev); - } + if (card->ext_csd.rev < 7) + return sysfs_emit(buf, "0x%x\n", card->cid.fwrev); + else + return sysfs_emit(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, + card->ext_csd.fwrev); } static DEVICE_ATTR(fwrev, S_IRUGO, mmc_fwrev_show, NULL); @@ -830,10 +830,10 @@ static ssize_t mmc_dsr_show(struct device *dev, struct mmc_host *host = card->host; if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); + return sysfs_emit(buf, "0x%x\n", host->dsr); else /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bd87012c220c..24b0418a24bb 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/pm_runtime.h> #include <linux/scatterlist.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -708,18 +709,16 @@ MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(rca, "0x%04x\n", card->rca); -static ssize_t mmc_dsr_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mmc_dsr_show(struct device *dev, struct device_attribute *attr, + char *buf) { - struct mmc_card *card = mmc_dev_to_card(dev); - struct mmc_host *host = card->host; - - if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); - else - /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + struct mmc_card *card = mmc_dev_to_card(dev); + struct mmc_host *host = card->host; + + if (card->csd.dsr_imp && host->dsr_req) + return sysfs_emit(buf, "0x%x\n", host->dsr); + /* return default DSR value */ + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); @@ -735,9 +734,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 41164748723d..25799accf8a0 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -7,6 +7,7 @@ #include <linux/err.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -40,9 +41,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index fda03b35c14a..c6268c38c69e 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -14,6 +14,7 @@ #include <linux/pm_runtime.h> #include <linux/pm_domain.h> #include <linux/acpi.h> +#include <linux/sysfs.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -35,7 +36,7 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf) \ struct sdio_func *func; \ \ func = dev_to_sdio_func (dev); \ - return sprintf(buf, format_string, args); \ + return sysfs_emit(buf, format_string, args); \ } \ static DEVICE_ATTR_RO(field) @@ -52,9 +53,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > func->num_info) \ return -ENODATA; \ - if (!func->info[num-1][0]) \ + if (!func->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", func->info[num-1]); \ + return sysfs_emit(buf, "%s\n", func->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mmc: core: use sysfs_emit() instead of sprintf()" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f5d8a5fe77ce933f53eb8f2e22bb7a1a2019ea11 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov <s.shtylyov(a)omp.ru> Date: Tue, 8 Feb 2022 15:02:15 +0300 Subject: [PATCH] mmc: core: use sysfs_emit() instead of sprintf() sprintf() (still used in the MMC core for the sysfs output) is vulnerable to the buffer overflow. Use the new-fangled sysfs_emit() instead. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov <s.shtylyov(a)omp.ru> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/717729b2-d65b-c72e-9fac-471d28d00b5a@omp.ru Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org> diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 096ae624be9a..58a60afa650b 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -15,6 +15,7 @@ #include <linux/stat.h> #include <linux/of.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -34,13 +35,13 @@ static ssize_t type_show(struct device *dev, switch (card->type) { case MMC_TYPE_MMC: - return sprintf(buf, "MMC\n"); + return sysfs_emit(buf, "MMC\n"); case MMC_TYPE_SD: - return sprintf(buf, "SD\n"); + return sysfs_emit(buf, "SD\n"); case MMC_TYPE_SDIO: - return sprintf(buf, "SDIO\n"); + return sysfs_emit(buf, "SDIO\n"); case MMC_TYPE_SD_COMBO: - return sprintf(buf, "SDcombo\n"); + return sysfs_emit(buf, "SDcombo\n"); default: return -EFAULT; } diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h index 8105852c4b62..3996b191b68d 100644 --- a/drivers/mmc/core/bus.h +++ b/drivers/mmc/core/bus.h @@ -9,6 +9,7 @@ #define _MMC_CORE_BUS_H #include <linux/device.h> +#include <linux/sysfs.h> struct mmc_host; struct mmc_card; @@ -17,7 +18,7 @@ struct mmc_card; static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ { \ struct mmc_card *card = mmc_dev_to_card(dev); \ - return sprintf(buf, fmt, args); \ + return sysfs_emit(buf, fmt, args); \ } \ static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index bbbbcaf70a59..13abfcd130a5 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -812,12 +813,11 @@ static ssize_t mmc_fwrev_show(struct device *dev, { struct mmc_card *card = mmc_dev_to_card(dev); - if (card->ext_csd.rev < 7) { - return sprintf(buf, "0x%x\n", card->cid.fwrev); - } else { - return sprintf(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, - card->ext_csd.fwrev); - } + if (card->ext_csd.rev < 7) + return sysfs_emit(buf, "0x%x\n", card->cid.fwrev); + else + return sysfs_emit(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, + card->ext_csd.fwrev); } static DEVICE_ATTR(fwrev, S_IRUGO, mmc_fwrev_show, NULL); @@ -830,10 +830,10 @@ static ssize_t mmc_dsr_show(struct device *dev, struct mmc_host *host = card->host; if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); + return sysfs_emit(buf, "0x%x\n", host->dsr); else /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bd87012c220c..24b0418a24bb 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/pm_runtime.h> #include <linux/scatterlist.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -708,18 +709,16 @@ MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(rca, "0x%04x\n", card->rca); -static ssize_t mmc_dsr_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mmc_dsr_show(struct device *dev, struct device_attribute *attr, + char *buf) { - struct mmc_card *card = mmc_dev_to_card(dev); - struct mmc_host *host = card->host; - - if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); - else - /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + struct mmc_card *card = mmc_dev_to_card(dev); + struct mmc_host *host = card->host; + + if (card->csd.dsr_imp && host->dsr_req) + return sysfs_emit(buf, "0x%x\n", host->dsr); + /* return default DSR value */ + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); @@ -735,9 +734,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 41164748723d..25799accf8a0 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -7,6 +7,7 @@ #include <linux/err.h> #include <linux/pm_runtime.h> +#include <linux/sysfs.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -40,9 +41,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index fda03b35c14a..c6268c38c69e 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -14,6 +14,7 @@ #include <linux/pm_runtime.h> #include <linux/pm_domain.h> #include <linux/acpi.h> +#include <linux/sysfs.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -35,7 +36,7 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf) \ struct sdio_func *func; \ \ func = dev_to_sdio_func (dev); \ - return sprintf(buf, format_string, args); \ + return sysfs_emit(buf, format_string, args); \ } \ static DEVICE_ATTR_RO(field) @@ -52,9 +53,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > func->num_info) \ return -ENODATA; \ - if (!func->info[num-1][0]) \ + if (!func->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", func->info[num-1]); \ + return sysfs_emit(buf, "%s\n", func->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b7fb0ae09009d076964afe4c1a2bde1ee2bd88a9 Mon Sep 17 00:00:00 2001 From: Ammar Faizi <ammarfaizi2(a)gnuweeb.org> Date: Fri, 25 Feb 2022 01:58:36 +0700 Subject: [PATCH] ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM Do not call snd_dma_free_pages() when snd_dma_alloc_pages() returns -ENOMEM because it leads to a NULL pointer dereference bug. The dmesg says: [ T1387] sof-audio-pci-intel-tgl 0000:00:1f.3: error: memory alloc failed: -12 [ T1387] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ T1387] #PF: supervisor read access in kernel mode [ T1387] #PF: error_code(0x0000) - not-present page [ T1387] PGD 0 P4D 0 [ T1387] Oops: 0000 [#1] PREEMPT SMP NOPTI [ T1387] CPU: 6 PID: 1387 Comm: alsa-sink-HDA A Tainted: G W 5.17.0-rc4-superb-owl-00055-g80d47f5de5e3 [ T1387] Hardware name: HP HP Laptop 14s-dq2xxx/87FD, BIOS F.15 09/15/2021 [ T1387] RIP: 0010:dma_free_noncontiguous+0x37/0x80 [ T1387] Code: [... snip ...] [ T1387] RSP: 0000:ffffc90002b87770 EFLAGS: 00010246 [ T1387] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ T1387] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888101db30d0 [ T1387] RBP: 00000000fffffff4 R08: 0000000000000000 R09: 0000000000000000 [ T1387] R10: 0000000000000000 R11: ffffc90002b874d0 R12: 0000000000000001 [ T1387] R13: 0000000000058000 R14: ffff888105260c68 R15: ffff888105260828 [ T1387] FS: 00007f42e2ffd640(0000) GS:ffff888466b80000(0000) knlGS:0000000000000000 [ T1387] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ T1387] CR2: 0000000000000000 CR3: 000000014acf0003 CR4: 0000000000770ee0 [ T1387] PKRU: 55555554 [ T1387] Call Trace: [ T1387] <TASK> [ T1387] cl_stream_prepare+0x10a/0x120 [snd_sof_intel_hda_common 146addf995b9279ae7f509621078cccbe4f875e1] [... snip ...] [ T1387] </TASK> Cc: Daniel Baluta <daniel.baluta(a)nxp.com> Cc: Jaroslav Kysela <perex(a)perex.cz> Cc: Kai Vehmanen <kai.vehmanen(a)linux.intel.com> Cc: Keyon Jie <yang.jie(a)linux.intel.com> Cc: Liam Girdwood <lgirdwood(a)gmail.com> Cc: Mark Brown <broonie(a)kernel.org> Cc: Rander Wang <rander.wang(a)intel.com> Cc: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com> Cc: Takashi Iwai <tiwai(a)suse.com> Cc: sound-open-firmware(a)alsa-project.org Cc: alsa-devel(a)alsa-project.org Cc: linux-kernel(a)vger.kernel.org Cc: stable(a)vger.kernel.org # v5.2+ Fixes: d16046ffa6de040bf580a64d5f4d0aa18258a854 ("ASoC: SOF: Intel: Add Intel specific HDA firmware loader") Link: https://lore.kernel.org/lkml/20220224145124.15985-1-ammarfaizi2@gnuweeb.org/ # v1 Link: https://lore.kernel.org/lkml/20220224180850.34592-1-ammarfaizi2@gnuweeb.org/ # v2 Link: https://lore.kernel.org/lkml/20220224182818.40301-1-ammarfaizi2@gnuweeb.org/ # v3 Reviewed-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com> Signed-off-by: Ammar Faizi <ammarfaizi2(a)gnuweeb.org> Link: https://lore.kernel.org/r/20220224185836.44907-1-ammarfaizi2@gnuweeb.org Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 33306d2023a7..9bbfdab8009d 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -47,7 +47,7 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); if (ret < 0) { dev_err(sdev->dev, "error: memory alloc failed: %d\n", ret); - goto error; + goto out_put; } hstream->period_bytes = 0;/* initialize period_bytes */ @@ -58,22 +58,23 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = hda_dsp_iccmax_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: iccmax stream prepare failed: %d\n", ret); - goto error; + goto out_free; } } else { ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: hdac prepare failed: %d\n", ret); - goto error; + goto out_free; } hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size); } return dsp_stream; -error: - hda_dsp_stream_put(sdev, direction, hstream->stream_tag); +out_free: snd_dma_free_pages(dmab); +out_put: + hda_dsp_stream_put(sdev, direction, hstream->stream_tag); return ERR_PTR(ret); }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] arm64: Do not defer reserve_crashkernel() for platforms with" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001 From: Vijay Balakrishna <vijayb(a)linux.microsoft.com> Date: Wed, 2 Mar 2022 09:38:09 -0800 Subject: [PATCH] arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones The following patches resulted in deferring crash kernel reservation to mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU), in particular Raspberry Pi 4. commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges") commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()") commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required") Above changes introduced boot slowdown due to linear map creation for all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed changes restore crash kernel reservation to earlier behavior thus avoids slow boot, particularly for platforms with IOMMU (no DMA memory zones). Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm no regression to deferring scheme of crash kernel memory reservation. In both cases successfully collected kernel crash dump. [1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.micr… Signed-off-by: Vijay Balakrishna <vijayb(a)linux.microsoft.com> Cc: stable(a)vger.kernel.org Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com> Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.mi… [will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build] Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..919be440494f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..ed21bf83d0b7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] arm64: Do not defer reserve_crashkernel() for platforms with" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001 From: Vijay Balakrishna <vijayb(a)linux.microsoft.com> Date: Wed, 2 Mar 2022 09:38:09 -0800 Subject: [PATCH] arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones The following patches resulted in deferring crash kernel reservation to mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU), in particular Raspberry Pi 4. commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges") commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()") commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required") Above changes introduced boot slowdown due to linear map creation for all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed changes restore crash kernel reservation to earlier behavior thus avoids slow boot, particularly for platforms with IOMMU (no DMA memory zones). Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm no regression to deferring scheme of crash kernel memory reservation. In both cases successfully collected kernel crash dump. [1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.micr… Signed-off-by: Vijay Balakrishna <vijayb(a)linux.microsoft.com> Cc: stable(a)vger.kernel.org Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com> Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.mi… [will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build] Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..919be440494f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..ed21bf83d0b7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void)

3 years, 5 months

1
0
0 0

[PATCH for v5.15+] PCI: fu740: Force 2.5GT/s for initial device probe

by Dimitri John Ledkov

From: Ben Dooks <ben.dooks(a)codethink.co.uk> commit a382c757ec5ef83137a86125f43a4c43dc2ab50b upstream. The fu740 PCIe core does not probe any devices on the SiFive Unmatched board without this fix (or having U-Boot explicitly start the PCIe via either boot-script or user command). The fix is to start the link at 2.5GT/s speeds and once the link is up then change the maximum speed back to the default. The U-Boot driver claims to set the link-speed to 2.5GT/s to get the probe to work (and U-Boot does print link up at 2.5GT/s) in the following code: https://source.denx.de/u-boot/u-boot/-/blob/master/drivers/pci/pcie_dw_sifi… Link: https://lore.kernel.org/r/20220318152430.526320-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks <ben.dooks(a)codethink.co.uk> Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com> Acked-by: Palmer Dabbelt <palmer(a)rivosinc.com> Signed-off-by: Dimitri John Ledkov <dimitri.ledkov(a)canonical.com> --- Please apply this patch to v5.15+ stable trees which fixes PCIe on the very popular SiFive Unmatched RISC-V board. drivers/pci/controller/dwc/pcie-fu740.c | 51 ++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c index 00cde9a248b5..78d002be4f82 100644 --- a/drivers/pci/controller/dwc/pcie-fu740.c +++ b/drivers/pci/controller/dwc/pcie-fu740.c @@ -181,10 +181,59 @@ static int fu740_pcie_start_link(struct dw_pcie *pci) { struct device *dev = pci->dev; struct fu740_pcie *afp = dev_get_drvdata(dev); + u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + int ret; + u32 orig, tmp; + + /* + * Force 2.5GT/s when starting the link, due to some devices not + * probing at higher speeds. This happens with the PCIe switch + * on the Unmatched board when U-Boot has not initialised the PCIe. + * The fix in U-Boot is to force 2.5GT/s, which then gets cleared + * by the soft reset done by this driver. + */ + dev_dbg(dev, "cap_exp at %x\n", cap_exp); + dw_pcie_dbi_ro_wr_en(pci); + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + orig = tmp & PCI_EXP_LNKCAP_SLS; + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= PCI_EXP_LNKCAP_SLS_2_5GB; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); /* Enable LTSSM */ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE); - return 0; + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start\n"); + goto err; + } + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) { + dev_dbg(dev, "changing speed back to original\n"); + + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= orig; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); + + tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + tmp |= PORT_LOGIC_SPEED_CHANGE; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp); + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start at new speed\n"); + goto err; + } + } + + ret = 0; +err: + WARN_ON(ret); /* we assume that errors will be very rare */ + dw_pcie_dbi_ro_wr_dis(pci); + return ret; } static int fu740_pcie_host_init(struct pcie_port *pp) -- 2.32.0

3 years, 5 months

2
1
0 0

[PATCH stable v5.10-v5.17] can: isotp: sanitize CAN ID checks in isotp_bind()

by Oliver Hartkopp

From: Oliver Hartkopp <socketcan(a)hartkopp.net> commit 3ea566422cbde9610c2734980d1286ab681bb40e upstream. Syzbot created an environment that lead to a state machine status that can not be reached with a compliant CAN ID address configuration. The provided address information consisted of CAN ID 0x6000001 and 0xC28001 which both boil down to 11 bit CAN IDs 0x001 in sending and receiving. Sanitize the SFF/EFF CAN ID values before performing the address checks. Cc: stable(a)vger.kernel.org # v5.10 - v5.17 Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20220316164258.54155-1-socketcan@hartkopp.net Reported-by: syzbot+2339c27f5c66c652843e(a)syzkaller.appspotmail.com Signed-off-by: Oliver Hartkopp <socketcan(a)hartkopp.net> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- net/can/isotp.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index d2a430b6a13b..f8e3aeb79e3f 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1102,19 +1102,30 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); struct net *net = sock_net(sk); int ifindex; struct net_device *dev; + canid_t tx_id, rx_id; int err = 0; int notify_enetdown = 0; int do_rx_reg = 1; if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + /* sanitize tx/rx CAN identifiers */ + tx_id = addr->can_addr.tp.tx_id; + if (tx_id & CAN_EFF_FLAG) + tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + tx_id &= CAN_SFF_MASK; + + rx_id = addr->can_addr.tp.rx_id; + if (rx_id & CAN_EFF_FLAG) + rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + rx_id &= CAN_SFF_MASK; if (!addr->can_ifindex) return -ENODEV; lock_sock(sk); @@ -1122,25 +1133,17 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* do not register frame reception for functional addressing */ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) do_rx_reg = 0; /* do not validate rx address for functional addressing */ - if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { - err = -EADDRNOTAVAIL; - goto out; - } + if (do_rx_reg && rx_id == tx_id) { + err = -EADDRNOTAVAIL; + goto out; } if (so->bound && addr->can_ifindex == so->ifindex && - addr->can_addr.tp.rx_id == so->rxid && - addr->can_addr.tp.tx_id == so->txid) + rx_id == so->rxid && tx_id == so->txid) goto out; dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { err = -ENODEV; @@ -1160,12 +1163,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) notify_enetdown = 1; ifindex = dev->ifindex; if (do_rx_reg) - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), + can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); dev_put(dev); if (so->bound && do_rx_reg) { @@ -1181,12 +1183,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) } } /* switch to new settings */ so->ifindex = ifindex; - so->rxid = addr->can_addr.tp.rx_id; - so->txid = addr->can_addr.tp.tx_id; + so->rxid = rx_id; + so->txid = tx_id; so->bound = 1; out: release_sock(sk); -- 2.30.2

3 years, 5 months

2
1
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6c8e2a256915a223f6289f651d6b926cd7135c9e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira <mfo(a)canonical.com> Date: Thu, 24 Mar 2022 18:14:09 -0700 Subject: [PATCH] mm: fix race between MADV_FREE reclaim and blkdev direct IO read Problem: ======= Userspace might read the zero-page instead of actual data from a direct IO read on a block device if the buffers have been called madvise(MADV_FREE) on earlier (this is discussed below) due to a race between page reclaim on MADV_FREE and blkdev direct IO read. - Race condition: ============== During page reclaim, the MADV_FREE page check in try_to_unmap_one() checks if the page is not dirty, then discards its rmap PTE(s) (vs. remap back if the page is dirty). However, after try_to_unmap_one() returns to shrink_page_list(), it might keep the page _anyway_ if page_ref_freeze() fails (it expects exactly _one_ page reference, from the isolation for page reclaim). Well, blkdev_direct_IO() gets references for all pages, and on READ operations it only sets them dirty _later_. So, if MADV_FREE'd pages (i.e., not dirty) are used as buffers for direct IO read from block devices, and page reclaim happens during __blkdev_direct_IO[_simple]() exactly AFTER bio_iov_iter_get_pages() returns, but BEFORE the pages are set dirty, the situation happens. The direct IO read eventually completes. Now, when userspace reads the buffers, the PTE is no longer there and the page fault handler do_anonymous_page() services that with the zero-page, NOT the data! A synthetic reproducer is provided. - Page faults: =========== If page reclaim happens BEFORE bio_iov_iter_get_pages() the issue doesn't happen, because that faults-in all pages as writeable, so do_anonymous_page() sets up a new page/rmap/PTE, and that is used by direct IO. The userspace reads don't fault as the PTE is there (thus zero-page is not used/setup). But if page reclaim happens AFTER it / BEFORE setting pages dirty, the PTE is no longer there; the subsequent page faults can't help: The data-read from the block device probably won't generate faults due to DMA (no MMU) but even in the case it wouldn't use DMA, that happens on different virtual addresses (not user-mapped addresses) because `struct bio_vec` stores `struct page` to figure addresses out (which are different from user-mapped addresses) for the read. Thus userspace reads (to user-mapped addresses) still fault, then do_anonymous_page() gets another `struct page` that would address/ map to other memory than the `struct page` used by `struct bio_vec` for the read. (The original `struct page` is not available, since it wasn't freed, as page_ref_freeze() failed due to more page refs. And even if it were available, its data cannot be trusted anymore.) Solution: ======== One solution is to check for the expected page reference count in try_to_unmap_one(). There should be one reference from the isolation (that is also checked in shrink_page_list() with page_ref_freeze()) plus one or more references from page mapping(s) (put in discard: label). Further references mean that rmap/PTE cannot be unmapped/nuked. (Note: there might be more than one reference from mapping due to fork()/clone() without CLONE_VM, which use the same `struct page` for references, until the copy-on-write page gets copied.) So, additional page references (e.g., from direct IO read) now prevent the rmap/PTE from being unmapped/dropped; similarly to the page is not freed per shrink_page_list()/page_ref_freeze()). - Races and Barriers: ================== The new check in try_to_unmap_one() should be safe in races with bio_iov_iter_get_pages() in get_user_pages() fast and slow paths, as it's done under the PTE lock. The fast path doesn't take the lock, but it checks if the PTE has changed and if so, it drops the reference and leaves the page for the slow path (which does take that lock). The fast path requires synchronization w/ full memory barrier: it writes the page reference count first then it reads the PTE later, while try_to_unmap() writes PTE first then it reads page refcount. And a second barrier is needed, as the page dirty flag should not be read before the page reference count (as in __remove_mapping()). (This can be a load memory barrier only; no writes are involved.) Call stack/comments: - try_to_unmap_one() - page_vma_mapped_walk() - map_pte() # see pte_offset_map_lock(): pte_offset_map() spin_lock() - ptep_get_and_clear() # write PTE - smp_mb() # (new barrier) GUP fast path - page_ref_count() # (new check) read refcount - page_vma_mapped_walk_done() # see pte_unmap_unlock(): pte_unmap() spin_unlock() - bio_iov_iter_get_pages() - __bio_iov_iter_get_pages() - iov_iter_get_pages() - get_user_pages_fast() - internal_get_user_pages_fast() # fast path - lockless_pages_from_mm() - gup_{pgd,p4d,pud,pmd,pte}_range() ptep = pte_offset_map() # not _lock() pte = ptep_get_lockless(ptep) page = pte_page(pte) try_grab_compound_head(page) # inc refcount # (RMW/barrier # on success) if (pte_val(pte) != pte_val(*ptep)) # read PTE put_compound_head(page) # dec refcount # go slow path # slow path - __gup_longterm_unlocked() - get_user_pages_unlocked() - __get_user_pages_locked() - __get_user_pages() - follow_{page,p4d,pud,pmd}_mask() - follow_page_pte() ptep = pte_offset_map_lock() pte = *ptep page = vm_normal_page(pte) try_grab_page(page) # inc refcount pte_unmap_unlock() - Huge Pages: ========== Regarding transparent hugepages, that logic shouldn't change, as MADV_FREE (aka lazyfree) pages are PageAnon() && !PageSwapBacked() (madvise_free_pte_range() -> mark_page_lazyfree() -> lru_lazyfree_fn()) thus should reach shrink_page_list() -> split_huge_page_to_list() before try_to_unmap[_one](), so it deals with normal pages only. (And in case unlikely/TTU_SPLIT_HUGE_PMD/split_huge_pmd_address() happens, which should not or be rare, the page refcount should be greater than mapcount: the head page is referenced by tail pages. That also prevents checking the head `page` then incorrectly call page_remove_rmap(subpage) for a tail page, that isn't even in the shrink_page_list()'s page_list (an effect of split huge pmd/pmvw), as it might happen today in this unlikely scenario.) MADV_FREE'd buffers: =================== So, back to the "if MADV_FREE pages are used as buffers" note. The case is arguable, and subject to multiple interpretations. The madvise(2) manual page on the MADV_FREE advice value says: 1) 'After a successful MADV_FREE ... data will be lost when the kernel frees the pages.' 2) 'the free operation will be canceled if the caller writes into the page' / 'subsequent writes ... will succeed and then [the] kernel cannot free those dirtied pages' 3) 'If there is no subsequent write, the kernel can free the pages at any time.' Thoughts, questions, considerations... respectively: 1) Since the kernel didn't actually free the page (page_ref_freeze() failed), should the data not have been lost? (on userspace read.) 2) Should writes performed by the direct IO read be able to cancel the free operation? - Should the direct IO read be considered as 'the caller' too, as it's been requested by 'the caller'? - Should the bio technique to dirty pages on return to userspace (bio_check_pages_dirty() is called/used by __blkdev_direct_IO()) be considered in another/special way here? 3) Should an upcoming write from a previously requested direct IO read be considered as a subsequent write, so the kernel should not free the pages? (as it's known at the time of page reclaim.) And lastly: Technically, the last point would seem a reasonable consideration and balance, as the madvise(2) manual page apparently (and fairly) seem to assume that 'writes' are memory access from the userspace process (not explicitly considering writes from the kernel or its corner cases; again, fairly).. plus the kernel fix implementation for the corner case of the largely 'non-atomic write' encompassed by a direct IO read operation, is relatively simple; and it helps. Reproducer: ========== @ test.c (simplified, but works) #define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main() { int fd, i; char *buf; fd = open(DEV, O_RDONLY | O_DIRECT); buf = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) buf[i] = 1; // init to non-zero madvise(buf, BUF_SIZE, MADV_FREE); read(fd, buf, BUF_SIZE); for (i = 0; i < BUF_SIZE; i += PAGE_SIZE) printf("%p: 0x%x\n", &buf[i], buf[i]); return 0; } @ block/fops.c (formerly fs/block_dev.c) +#include <linux/swap.h> ... ... __blkdev_direct_IO[_simple](...) { ... + if (!strcmp(current->comm, "good")) + shrink_all_memory(ULONG_MAX); + ret = bio_iov_iter_get_pages(...); + + if (!strcmp(current->comm, "bad")) + shrink_all_memory(ULONG_MAX); ... } @ shell # NUM_PAGES=4 # PAGE_SIZE=$(getconf PAGE_SIZE) # yes | dd of=test.img bs=${PAGE_SIZE} count=${NUM_PAGES} # DEV=$(losetup -f --show test.img) # gcc -DDEV=\"$DEV\" \ -DBUF_SIZE=$((PAGE_SIZE * NUM_PAGES)) \ -DPAGE_SIZE=${PAGE_SIZE} \ test.c -o test # od -tx1 $DEV 0000000 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a 79 0a * 0040000 # mv test good # ./good 0x7f7c10418000: 0x79 0x7f7c10419000: 0x79 0x7f7c1041a000: 0x79 0x7f7c1041b000: 0x79 # mv good bad # ./bad 0x7fa1b8050000: 0x0 0x7fa1b8051000: 0x0 0x7fa1b8052000: 0x0 0x7fa1b8053000: 0x0 Note: the issue is consistent on v5.17-rc3, but it's intermittent with the support of MADV_FREE on v4.5 (60%-70% error; needs swap). [wrap do_direct_IO() in do_blockdev_direct_IO() @ fs/direct-io.c]. - v5.17-rc3: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x0 # free | grep Swap Swap: 0 0 0 - v4.5: # for i in {1..1000}; do ./good; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 # mv good bad # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 2702 0x0 1298 0x79 # swapoff -av swapoff /swap # for i in {1..1000}; do ./bad; done \ | cut -d: -f2 | sort | uniq -c 4000 0x79 Ceph/TCMalloc: ============= For documentation purposes, the use case driving the analysis/fix is Ceph on Ubuntu 18.04, as the TCMalloc library there still uses MADV_FREE to release unused memory to the system from the mmap'ed page heap (might be committed back/used again; it's not munmap'ed.) - PageHeap::DecommitSpan() -> TCMalloc_SystemRelease() -> madvise() - PageHeap::CommitSpan() -> TCMalloc_SystemCommit() -> do nothing. Note: TCMalloc switched back to MADV_DONTNEED a few commits after the release in Ubuntu 18.04 (google-perftools/gperftools 2.5), so the issue just 'disappeared' on Ceph on later Ubuntu releases but is still present in the kernel, and can be hit by other use cases. The observed issue seems to be the old Ceph bug #22464 [1], where checksum mismatches are observed (and instrumentation with buffer dumps shows zero-pages read from mmap'ed/MADV_FREE'd page ranges). The issue in Ceph was reasonably deemed a kernel bug (comment #50) and mostly worked around with a retry mechanism, but other parts of Ceph could still hit that (rocksdb). Anyway, it's less likely to be hit again as TCMalloc switched out of MADV_FREE by default. (Some kernel versions/reports from the Ceph bug, and relation with the MADV_FREE introduction/changes; TCMalloc versions not checked.) - 4.4 good - 4.5 (madv_free: introduction) - 4.9 bad - 4.10 good? maybe a swapless system - 4.12 (madv_free: no longer free instantly on swapless systems) - 4.13 bad [1] https://tracker.ceph.com/issues/22464 Thanks: ====== Several people contributed to analysis/discussions/tests/reproducers in the first stages when drilling down on ceph/tcmalloc/linux kernel: - Dan Hill - Dan Streetman - Dongdong Tao - Gavin Guo - Gerald Yang - Heitor Alves de Siqueira - Ioanna Alifieraki - Jay Vosburgh - Matthew Ruffell - Ponnuvel Palaniyappan Reviews, suggestions, corrections, comments: - Minchan Kim - Yu Zhao - Huang, Ying - John Hubbard - Christoph Hellwig [mfo(a)canonical.com: v4] Link: https://lkml.kernel.org/r/20220209202659.183418-1-mfo@canonical.comLink: https://lkml.kernel.org/r/20220131230255.789059-1-mfo@canonical.com Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Signed-off-by: Mauricio Faria de Oliveira <mfo(a)canonical.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Yu Zhao <yuzhao(a)google.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Dan Hill <daniel.hill(a)canonical.com> Cc: Dan Streetman <dan.streetman(a)canonical.com> Cc: Dongdong Tao <dongdong.tao(a)canonical.com> Cc: Gavin Guo <gavin.guo(a)canonical.com> Cc: Gerald Yang <gerald.yang(a)canonical.com> Cc: Heitor Alves de Siqueira <halves(a)canonical.com> Cc: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com> Cc: Jay Vosburgh <jay.vosburgh(a)canonical.com> Cc: Matthew Ruffell <matthew.ruffell(a)canonical.com> Cc: Ponnuvel Palaniyappan <ponnuvel.palaniyappan(a)canonical.com> Cc: <stable(a)vger.kernel.org> Cc: Christoph Hellwig <hch(a)infradead.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/mm/rmap.c b/mm/rmap.c index bfcc8e3d412f..5cb970d51f0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1588,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!folio_test_swapbacked(folio)) { - if (!folio_test_dirty(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: only re-generate demotion targets when a numa node" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 734c15700cdf9062ae98d8b131c6fe873dfad26d Mon Sep 17 00:00:00 2001 From: Oscar Salvador <osalvador(a)suse.de> Date: Tue, 22 Mar 2022 14:47:37 -0700 Subject: [PATCH] mm: only re-generate demotion targets when a numa node changes its N_CPU state Abhishek reported that after patch [1], hotplug operations are taking roughly double the expected time. [2] The reason behind is that the CPU callbacks that migrate_on_reclaim_init() sets always call set_migration_target_nodes() whenever a CPU is brought up/down. But we only care about numa nodes going from having cpus to become cpuless, and vice versa, as that influences the demotion_target order. We do already have two CPU callbacks (vmstat_cpu_online() and vmstat_cpu_dead()) that check exactly that, so get rid of the CPU callbacks in migrate_on_reclaim_init() and only call set_migration_target_nodes() from vmstat_cpu_{dead,online}() whenever a numa node change its N_CPU state. [1] https://lore.kernel.org/linux-mm/20210721063926.3024591-2-ying.huang@intel.… [2] https://lore.kernel.org/linux-mm/eb438ddd-2919-73d4-bd9f-b7eecdd9577a@linux… [osalvador(a)suse.de: add feedback from Huang Ying] Link: https://lkml.kernel.org/r/20220314150945.12694-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20220310120749.23077-1-osalvador@suse.de Fixes: 884a6e5d1f93b ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Reported-by: Abhishek Goel <huntbag(a)linux.vnet.ibm.com> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Abhishek Goel <huntbag(a)linux.vnet.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da..90e75d5a54d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, diff --git a/mm/migrate.c b/mm/migrate.c index 78b2cf87946d..bc9da3fd01aa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3209,7 +3209,7 @@ static void __set_migration_target_nodes(void) /* * For callers that do not hold get_online_mems() already. */ -static void set_migration_target_nodes(void) +void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); @@ -3273,51 +3273,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) +void __init migrate_on_reclaim_init(void) { - set_migration_target_nodes(); - return 0; -} - -static int __init migrate_on_reclaim_init(void) -{ - int ret; - node_demotion = kmalloc_array(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", - NULL, migration_offline_cpu); + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); /* - * In the unlikely case that this fails, the automatic - * migration targets may become suboptimal for nodes - * where N_CPU changes. With such a small impact in a - * rare case, do not bother trying to do anything special. + * At this point, all numa nodes with memory/CPus have their state + * properly set, so we can build the demotion order now. + * Let us hold the cpu_hotplug lock just, as we could possibily have + * CPU hotplug events during boot. */ - WARN_ON(ret < 0); - ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", - migration_online_cpu, NULL); - WARN_ON(ret < 0); - - hotplug_memory_notifier(migrate_on_reclaim_callback, 100); - return 0; + cpus_read_lock(); + set_migration_target_nodes(); + cpus_read_unlock(); } -late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; diff --git a/mm/vmstat.c b/mm/vmstat.c index d5cc8d739fac..b75b1a64b54c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -28,6 +28,7 @@ #include <linux/mm_inline.h> #include <linux/page_ext.h> #include <linux/page_owner.h> +#include <linux/migrate.h> #include "internal.h" @@ -2049,7 +2050,12 @@ static void __init init_cpu_node_state(void) static int vmstat_cpu_online(unsigned int cpu) { refresh_zone_stat_thresholds(); - node_set_state(cpu_to_node(cpu), N_CPU); + + if (!node_state(cpu_to_node(cpu), N_CPU)) { + node_set_state(cpu_to_node(cpu), N_CPU); + set_migration_target_nodes(); + } + return 0; } @@ -2072,6 +2078,8 @@ static int vmstat_cpu_dead(unsigned int cpu) return 0; node_clear_state(node, N_CPU); + set_migration_target_nodes(); + return 0; } @@ -2103,6 +2111,9 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif +#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU) + migrate_on_reclaim_init(); +#endif #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mm: only re-generate demotion targets when a numa node" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 734c15700cdf9062ae98d8b131c6fe873dfad26d Mon Sep 17 00:00:00 2001 From: Oscar Salvador <osalvador(a)suse.de> Date: Tue, 22 Mar 2022 14:47:37 -0700 Subject: [PATCH] mm: only re-generate demotion targets when a numa node changes its N_CPU state Abhishek reported that after patch [1], hotplug operations are taking roughly double the expected time. [2] The reason behind is that the CPU callbacks that migrate_on_reclaim_init() sets always call set_migration_target_nodes() whenever a CPU is brought up/down. But we only care about numa nodes going from having cpus to become cpuless, and vice versa, as that influences the demotion_target order. We do already have two CPU callbacks (vmstat_cpu_online() and vmstat_cpu_dead()) that check exactly that, so get rid of the CPU callbacks in migrate_on_reclaim_init() and only call set_migration_target_nodes() from vmstat_cpu_{dead,online}() whenever a numa node change its N_CPU state. [1] https://lore.kernel.org/linux-mm/20210721063926.3024591-2-ying.huang@intel.… [2] https://lore.kernel.org/linux-mm/eb438ddd-2919-73d4-bd9f-b7eecdd9577a@linux… [osalvador(a)suse.de: add feedback from Huang Ying] Link: https://lkml.kernel.org/r/20220314150945.12694-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20220310120749.23077-1-osalvador@suse.de Fixes: 884a6e5d1f93b ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Reported-by: Abhishek Goel <huntbag(a)linux.vnet.ibm.com> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Abhishek Goel <huntbag(a)linux.vnet.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da..90e75d5a54d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, diff --git a/mm/migrate.c b/mm/migrate.c index 78b2cf87946d..bc9da3fd01aa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3209,7 +3209,7 @@ static void __set_migration_target_nodes(void) /* * For callers that do not hold get_online_mems() already. */ -static void set_migration_target_nodes(void) +void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); @@ -3273,51 +3273,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) +void __init migrate_on_reclaim_init(void) { - set_migration_target_nodes(); - return 0; -} - -static int __init migrate_on_reclaim_init(void) -{ - int ret; - node_demotion = kmalloc_array(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", - NULL, migration_offline_cpu); + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); /* - * In the unlikely case that this fails, the automatic - * migration targets may become suboptimal for nodes - * where N_CPU changes. With such a small impact in a - * rare case, do not bother trying to do anything special. + * At this point, all numa nodes with memory/CPus have their state + * properly set, so we can build the demotion order now. + * Let us hold the cpu_hotplug lock just, as we could possibily have + * CPU hotplug events during boot. */ - WARN_ON(ret < 0); - ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", - migration_online_cpu, NULL); - WARN_ON(ret < 0); - - hotplug_memory_notifier(migrate_on_reclaim_callback, 100); - return 0; + cpus_read_lock(); + set_migration_target_nodes(); + cpus_read_unlock(); } -late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; diff --git a/mm/vmstat.c b/mm/vmstat.c index d5cc8d739fac..b75b1a64b54c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -28,6 +28,7 @@ #include <linux/mm_inline.h> #include <linux/page_ext.h> #include <linux/page_owner.h> +#include <linux/migrate.h> #include "internal.h" @@ -2049,7 +2050,12 @@ static void __init init_cpu_node_state(void) static int vmstat_cpu_online(unsigned int cpu) { refresh_zone_stat_thresholds(); - node_set_state(cpu_to_node(cpu), N_CPU); + + if (!node_state(cpu_to_node(cpu), N_CPU)) { + node_set_state(cpu_to_node(cpu), N_CPU); + set_migration_target_nodes(); + } + return 0; } @@ -2072,6 +2078,8 @@ static int vmstat_cpu_dead(unsigned int cpu) return 0; node_clear_state(node, N_CPU); + set_migration_target_nodes(); + return 0; } @@ -2103,6 +2111,9 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif +#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU) + migrate_on_reclaim_init(); +#endif #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);

3 years, 5 months

1
0
0 0

[PATCH 0/1] drm/simpledrm: Add "panel orientation" property on non-upright mounted LCD panels

by Hans de Goede

Hi Greg, Fedora 36 has switched from efifb to simpledrm as the pre-native-GPU driver fb provider and the lack of this patch is causing issues for devices with non up-right mounted LCD panels (1), can you please add this to the 5.17 stable series? Regards, Hans 1) https://bugzilla.redhat.com/show_bug.cgi?id=2071134 Hans de Goede (1): drm/simpledrm: Add "panel orientation" property on non-upright mounted LCD panels drivers/gpu/drm/tiny/simpledrm.c | 3 +++ 1 file changed, 3 insertions(+) -- 2.35.1

3 years, 5 months

2
2
0 0

Linux 4.14.275

by Greg Kroah-Hartman

I'm announcing the release of the 4.14.275 kernel. All users of the 4.14 kernel series must upgrade. The updated 4.14.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.14.y and can be browsed at the normal kernel.org git web browser: https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h ------------ Documentation/arm64/silicon-errata.txt | 1 Makefile | 2 arch/arm/include/asm/kvm_host.h | 6 arch/arm64/Kconfig | 24 ++ arch/arm64/include/asm/assembler.h | 34 ++ arch/arm64/include/asm/cpu.h | 1 arch/arm64/include/asm/cpucaps.h | 4 arch/arm64/include/asm/cpufeature.h | 39 +++ arch/arm64/include/asm/cputype.h | 20 + arch/arm64/include/asm/fixmap.h | 6 arch/arm64/include/asm/kvm_host.h | 5 arch/arm64/include/asm/kvm_mmu.h | 2 arch/arm64/include/asm/mmu.h | 8 arch/arm64/include/asm/sections.h | 6 arch/arm64/include/asm/sysreg.h | 5 arch/arm64/include/asm/vectors.h | 74 ++++++ arch/arm64/kernel/bpi.S | 55 ++++ arch/arm64/kernel/cpu_errata.c | 395 ++++++++++++++++++++++++++++++++- arch/arm64/kernel/cpufeature.c | 21 + arch/arm64/kernel/cpuinfo.c | 1 arch/arm64/kernel/entry.S | 196 +++++++++++++--- arch/arm64/kernel/vmlinux.lds.S | 2 arch/arm64/kvm/hyp/hyp-entry.S | 4 arch/arm64/kvm/hyp/switch.c | 9 arch/arm64/mm/mmu.c | 11 drivers/clocksource/arm_arch_timer.c | 15 + include/linux/arm-smccc.h | 7 virt/kvm/arm/psci.c | 12 + 28 files changed, 908 insertions(+), 57 deletions(-) Anshuman Khandual (1): arm64: Add Cortex-X2 CPU part definition Arnd Bergmann (1): arm64: arch_timer: avoid unused function warning Greg Kroah-Hartman (1): Linux 4.14.275 James Morse (19): arm64: entry.S: Add ventry overflow sanity checks arm64: entry: Make the trampoline cleanup optional arm64: entry: Free up another register on kpti's tramp_exit path arm64: entry: Move the trampoline data page before the text page arm64: entry: Allow tramp_alias to access symbols after the 4K boundary arm64: entry: Don't assume tramp_vectors is the start of the vectors arm64: entry: Move trampoline macros out of ifdef'd section arm64: entry: Make the kpti trampoline's kpti sequence optional arm64: entry: Allow the trampoline text to occupy multiple pages arm64: entry: Add non-kpti __bp_harden_el1_vectors for mitigations arm64: entry: Add vectors that have the bhb mitigation sequences arm64: entry: Add macro for reading symbol addresses from the trampoline arm64: Add percpu vectors for EL1 arm64: proton-pack: Report Spectre-BHB vulnerabilities as part of Spectre-v2 KVM: arm64: Add templates for BHB mitigation sequences arm64: Mitigate spectre style branch history side channels KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated arm64: add ID_AA64ISAR2_EL1 sys register arm64: Use the clearbhb instruction in mitigations Marc Zyngier (4): arm64: arch_timer: Add workaround for ARM erratum 1188873 arm64: Add silicon-errata.txt entry for ARM erratum 1188873 arm64: Make ARM64_ERRATUM_1188873 depend on COMPAT arm64: Add part number for Neoverse N1 Rob Herring (1): arm64: Add part number for Arm Cortex-A77 Suzuki K Poulose (1): arm64: Add Neoverse-N2, Cortex-A710 CPU part definition

3 years, 5 months

1
1
0 0

[PATCH 1/2] cifs: prevent bad output lengths in smb2_ioctl_query_info()

by Paulo Alcantara

When calling smb2_ioctl_query_info() with smb_query_info::flags=PASSTHRU_FSCTL and smb_query_info::output_buffer_length=0, the following would return 0x10 buffer = memdup_user(arg + sizeof(struct smb_query_info), qi.output_buffer_length); if (IS_ERR(buffer)) { kfree(vars); return PTR_ERR(buffer); } rather than a valid pointer thus making IS_ERR() check fail. This would then cause a NULL ptr deference in @buffer when accessing it later in smb2_ioctl_query_ioctl(). While at it, prevent having a @buffer smaller than 8 bytes to correctly handle SMB2_SET_INFO FileEndOfFileInformation requests when smb_query_info::flags=PASSTHRU_SET_INFO. Here is a small C reproducer which triggers a NULL ptr in @buffer when passing an invalid smb_query_info::flags #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <unistd.h> #include <fcntl.h> #include <sys/ioctl.h> #define die(s) perror(s), exit(1) #define QUERY_INFO 0xc018cf07 int main(int argc, char *argv[]) { int fd; if (argc < 2) exit(1); fd = open(argv[1], O_RDONLY); if (fd == -1) die("open"); if (ioctl(fd, QUERY_INFO, (uint32_t[]) { 0, 0, 0, 4, 0, 0}) == -1) die("ioctl"); close(fd); return 0; } mount.cifs //srv/share /mnt -o ... gcc repro.c && ./a.out /mnt/f0 [ 114.138620] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 114.139310] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 114.139775] CPU: 2 PID: 995 Comm: a.out Not tainted 5.17.0-rc8 #1 [ 114.140148] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 [ 114.140818] RIP: 0010:smb2_ioctl_query_info+0x206/0x410 [cifs] [ 114.141221] Code: 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 c8 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 7b 28 4c 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9c 01 00 00 49 8b 3f e8 58 02 fb ff 48 8b 14 24 [ 114.142348] RSP: 0018:ffffc90000b47b00 EFLAGS: 00010256 [ 114.142692] RAX: dffffc0000000000 RBX: ffff888115503200 RCX: ffffffffa020580d [ 114.143119] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffa043a380 [ 114.143544] RBP: ffff888115503278 R08: 0000000000000001 R09: 0000000000000003 [ 114.143983] R10: fffffbfff4087470 R11: 0000000000000001 R12: ffff888115503288 [ 114.144424] R13: 00000000ffffffea R14: ffff888115503228 R15: 0000000000000000 [ 114.144852] FS: 00007f7aeabdf740(0000) GS:ffff888151600000(0000) knlGS:0000000000000000 [ 114.145338] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 114.145692] CR2: 00007f7aeacfdf5e CR3: 000000012000e000 CR4: 0000000000350ee0 [ 114.146131] Call Trace: [ 114.146291] <TASK> [ 114.146432] ? smb2_query_reparse_tag+0x890/0x890 [cifs] [ 114.146800] ? cifs_mapchar+0x460/0x460 [cifs] [ 114.147121] ? rcu_read_lock_sched_held+0x3f/0x70 [ 114.147412] ? cifs_strndup_to_utf16+0x15b/0x250 [cifs] [ 114.147775] ? dentry_path_raw+0xa6/0xf0 [ 114.148024] ? cifs_convert_path_to_utf16+0x198/0x220 [cifs] [ 114.148413] ? smb2_check_message+0x1080/0x1080 [cifs] [ 114.148766] ? rcu_read_lock_sched_held+0x3f/0x70 [ 114.149065] cifs_ioctl+0x1577/0x3320 [cifs] [ 114.149371] ? lock_downgrade+0x6f0/0x6f0 [ 114.149631] ? cifs_readdir+0x2e60/0x2e60 [cifs] [ 114.149956] ? rcu_read_lock_sched_held+0x3f/0x70 [ 114.150250] ? __rseq_handle_notify_resume+0x80b/0xbe0 [ 114.150562] ? __up_read+0x192/0x710 [ 114.150791] ? __ia32_sys_rseq+0xf0/0xf0 [ 114.151025] ? __x64_sys_openat+0x11f/0x1d0 [ 114.151296] __x64_sys_ioctl+0x127/0x190 [ 114.151549] do_syscall_64+0x3b/0x90 [ 114.151768] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 114.152079] RIP: 0033:0x7f7aead043df [ 114.152306] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 114.153431] RSP: 002b:00007ffc2e0c1f80 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 114.153890] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7aead043df [ 114.154315] RDX: 00007ffc2e0c1ff0 RSI: 00000000c018cf07 RDI: 0000000000000003 [ 114.154747] RBP: 00007ffc2e0c2010 R08: 00007f7aeae03db0 R09: 00007f7aeae24c4e [ 114.155192] R10: 00007f7aeabf7d40 R11: 0000000000000246 R12: 00007ffc2e0c2128 [ 114.155642] R13: 0000000000401176 R14: 0000000000403df8 R15: 00007f7aeae57000 [ 114.156071] </TASK> [ 114.156218] Modules linked in: cifs cifs_arc4 cifs_md4 bpf_preload [ 114.156608] ---[ end trace 0000000000000000 ]--- [ 114.156898] RIP: 0010:smb2_ioctl_query_info+0x206/0x410 [cifs] [ 114.157792] Code: 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 c8 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 7b 28 4c 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9c 01 00 00 49 8b 3f e8 58 02 fb ff 48 8b 14 24 [ 114.159293] RSP: 0018:ffffc90000b47b00 EFLAGS: 00010256 [ 114.159641] RAX: dffffc0000000000 RBX: ffff888115503200 RCX: ffffffffa020580d [ 114.160093] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffa043a380 [ 114.160699] RBP: ffff888115503278 R08: 0000000000000001 R09: 0000000000000003 [ 114.161196] R10: fffffbfff4087470 R11: 0000000000000001 R12: ffff888115503288 [ 114.155642] R13: 0000000000401176 R14: 0000000000403df8 R15: 00007f7aeae57000 [ 114.156071] </TASK> [ 114.156218] Modules linked in: cifs cifs_arc4 cifs_md4 bpf_preload [ 114.156608] ---[ end trace 0000000000000000 ]--- [ 114.156898] RIP: 0010:smb2_ioctl_query_info+0x206/0x410 [cifs] [ 114.157792] Code: 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 c8 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 7b 28 4c 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9c 01 00 00 49 8b 3f e8 58 02 fb ff 48 8b 14 24 [ 114.159293] RSP: 0018:ffffc90000b47b00 EFLAGS: 00010256 [ 114.159641] RAX: dffffc0000000000 RBX: ffff888115503200 RCX: ffffffffa020580d [ 114.160093] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffa043a380 [ 114.160699] RBP: ffff888115503278 R08: 0000000000000001 R09: 0000000000000003 [ 114.161196] R10: fffffbfff4087470 R11: 0000000000000001 R12: ffff888115503288 [ 114.161823] R13: 00000000ffffffea R14: ffff888115503228 R15: 0000000000000000 [ 114.162274] FS: 00007f7aeabdf740(0000) GS:ffff888151600000(0000) knlGS:0000000000000000 [ 114.162853] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 114.163218] CR2: 00007f7aeacfdf5e CR3: 000000012000e000 CR4: 0000000000350ee0 [ 114.163691] Kernel panic - not syncing: Fatal exception [ 114.164087] Kernel Offset: disabled [ 114.164316] ---[ end Kernel panic - not syncing: Fatal exception ]--- Cc: stable(a)vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) <pc(a)cjr.nz> --- fs/cifs/smb2ops.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bf5d5b5ea829..600a5be3ccd0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1662,11 +1662,12 @@ smb2_ioctl_query_info(const unsigned int xid, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - buffer = memdup_user(arg + sizeof(struct smb_query_info), - qi.output_buffer_length); - if (IS_ERR(buffer)) { - kfree(vars); - return PTR_ERR(buffer); + if (qi.output_buffer_length) { + buffer = memdup_user(arg + sizeof(struct smb_query_info), qi.output_buffer_length); + if (IS_ERR(buffer)) { + kfree(vars); + return PTR_ERR(buffer); + } } /* Open */ @@ -1729,10 +1730,13 @@ smb2_ioctl_query_info(const unsigned int xid, /* Can eventually relax perm check since server enforces too */ if (!capable(CAP_SYS_ADMIN)) rc = -EPERM; - else { + else if (qi.output_buffer_length < 8) + rc = -EINVAL; + else { rqst[1].rq_iov = &vars->si_iov[0]; rqst[1].rq_nvec = 1; + /* MS-FSCC 2.4.13 FileEndOfFileInformation */ size[0] = 8; data[0] = buffer; -- 2.35.1

3 years, 5 months

2
2
0 0

FAILED: patch "[PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7ba89d2af17aa879dda30f5d5d3f152e587fc551 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 23 Mar 2022 09:32:35 -0600 Subject: [PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly We currently don't attempt to get the full asked for length even if MSG_WAITALL is set, if we get a partial receive. If we do see a partial receive, then just note how many bytes we did and return -EAGAIN to get it retried. The iov is advanced appropriately for the vector based case, and we manually bump the buffer and remainder for the non-vector case. Cc: stable(a)vger.kernel.org Reported-by: Constantine Gavrilov <constantine.gavrilov(a)gmail.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/fs/io_uring.c b/fs/io_uring.c index f41d91ce1fd0..a70de170aea1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -612,6 +612,7 @@ struct io_sr_msg { int msg_flags; int bgid; size_t len; + size_t done_io; }; struct io_open { @@ -5417,12 +5418,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; struct io_buffer *kbuf; unsigned flags; @@ -5465,6 +5475,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -5474,6 +5488,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; } @@ -5524,12 +5542,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + return -EAGAIN; + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: req_set_fail(req); } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7ba89d2af17aa879dda30f5d5d3f152e587fc551 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 23 Mar 2022 09:32:35 -0600 Subject: [PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly We currently don't attempt to get the full asked for length even if MSG_WAITALL is set, if we get a partial receive. If we do see a partial receive, then just note how many bytes we did and return -EAGAIN to get it retried. The iov is advanced appropriately for the vector based case, and we manually bump the buffer and remainder for the non-vector case. Cc: stable(a)vger.kernel.org Reported-by: Constantine Gavrilov <constantine.gavrilov(a)gmail.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/fs/io_uring.c b/fs/io_uring.c index f41d91ce1fd0..a70de170aea1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -612,6 +612,7 @@ struct io_sr_msg { int msg_flags; int bgid; size_t len; + size_t done_io; }; struct io_open { @@ -5417,12 +5418,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; struct io_buffer *kbuf; unsigned flags; @@ -5465,6 +5475,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -5474,6 +5488,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; } @@ -5524,12 +5542,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + return -EAGAIN; + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: req_set_fail(req); } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7ba89d2af17aa879dda30f5d5d3f152e587fc551 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 23 Mar 2022 09:32:35 -0600 Subject: [PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly We currently don't attempt to get the full asked for length even if MSG_WAITALL is set, if we get a partial receive. If we do see a partial receive, then just note how many bytes we did and return -EAGAIN to get it retried. The iov is advanced appropriately for the vector based case, and we manually bump the buffer and remainder for the non-vector case. Cc: stable(a)vger.kernel.org Reported-by: Constantine Gavrilov <constantine.gavrilov(a)gmail.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/fs/io_uring.c b/fs/io_uring.c index f41d91ce1fd0..a70de170aea1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -612,6 +612,7 @@ struct io_sr_msg { int msg_flags; int bgid; size_t len; + size_t done_io; }; struct io_open { @@ -5417,12 +5418,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; struct io_buffer *kbuf; unsigned flags; @@ -5465,6 +5475,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -5474,6 +5488,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; } @@ -5524,12 +5542,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + return -EAGAIN; + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: req_set_fail(req); } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; }

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7ba89d2af17aa879dda30f5d5d3f152e587fc551 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 23 Mar 2022 09:32:35 -0600 Subject: [PATCH] io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly We currently don't attempt to get the full asked for length even if MSG_WAITALL is set, if we get a partial receive. If we do see a partial receive, then just note how many bytes we did and return -EAGAIN to get it retried. The iov is advanced appropriately for the vector based case, and we manually bump the buffer and remainder for the non-vector case. Cc: stable(a)vger.kernel.org Reported-by: Constantine Gavrilov <constantine.gavrilov(a)gmail.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/fs/io_uring.c b/fs/io_uring.c index f41d91ce1fd0..a70de170aea1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -612,6 +612,7 @@ struct io_sr_msg { int msg_flags; int bgid; size_t len; + size_t done_io; }; struct io_open { @@ -5417,12 +5418,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; struct io_buffer *kbuf; unsigned flags; @@ -5465,6 +5475,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -5474,6 +5488,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; } @@ -5524,12 +5542,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + return -EAGAIN; + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: req_set_fail(req); } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); return 0; }

3 years, 5 months

1
0
0 0

[PATCH AUTOSEL 5.15 01/16] remoteproc: coredump: Correct argument 2 type for memcpy_fromio

by Sasha Levin

From: Peng Fan <peng.fan(a)nxp.com> [ Upstream commit 876e0b26ccd211ca92607d83c87cc1f097784c6d ] Address the sparse check warning: >> drivers/remoteproc/remoteproc_coredump.c:169:53: sparse: warning: incorrect type in argument 2 (different address spaces) sparse: expected void const volatile [noderef] __iomem *src sparse: got void *[assigned] ptr Reported-by: kernel test robot <lkp(a)intel.com> Signed-off-by: Peng Fan <peng.fan(a)nxp.com> Link: https://lore.kernel.org/r/20211110032101.517487-1-peng.fan@oss.nxp.com Signed-off-by: Mathieu Poirier <mathieu.poirier(a)linaro.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/remoteproc/remoteproc_coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index c892f433a323e..4b093420d98aa 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -166,7 +166,7 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest, memset(dest, 0xff, size); } else { if (is_iomem) - memcpy_fromio(dest, ptr, size); + memcpy_fromio(dest, (void const __iomem *)ptr, size); else memcpy(dest, ptr, size); } -- 2.34.1

3 years, 5 months

2
16
0 0

[PATCH 3/3] drm/vmwgfx: Fix gem refcounting on prime exported surfaces

by Zack Rusin

From: Zack Rusin <zackr(a)vmware.com> vmwgfx exports two different kinds of gpu buffers to the userspace: surfaces and mob's. Surfaces are backed by mob's. Currently only surfaces are allowed with prime. Surfaces exported as prime weren't increasing the reference count on the backing mob's (gem objects), which meant that if the userspace destroyed the mob's, the exported surface was becoming invalid and its usage lead to crashes (due to usage after free). Surfaces need to increase the reference count on the backing mob's for the duration of the exported file descriptor for purposes of prime. Same has to happen when an already existing mob is passed to the surface, its reference count has to be increased. This fixes crashes with XA state tracker which is used for xrender acceleration on xf86-video-vmware. Signed-off-by: Zack Rusin <zackr(a)vmware.com> Cc: <stable(a)vger.kernel.org> # v5.17+ Reviewed-by: Martin Krastev <krastevm(a)vmware.com> Reviewed-by: Maaz Mombasawala <mombasawalam(a)vmware.com> --- drivers/gpu/drm/vmwgfx/ttm_object.c | 7 ++++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 ++ drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 6 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_prime.c | 14 ++++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 12 ++++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 1 + 6 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 26a55fef1ab5..53e9f81f7e1b 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -51,6 +51,7 @@ #include <linux/module.h> #include "ttm_object.h" #include "vmwgfx_drv.h" +#include "vmwgfx_resource_priv.h" MODULE_IMPORT_NS(DMA_BUF); @@ -617,6 +618,7 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, struct ttm_base_object *base; struct dma_buf *dma_buf; struct ttm_prime_object *prime; + struct vmw_resource *res; int ret; base = ttm_base_object_lookup(tfile, handle); @@ -667,6 +669,9 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, ret = dma_buf_fd(dma_buf, flags); if (ret >= 0) { + res = user_surface_converter->base_obj_to_res(&prime->base); + if (res) + vmw_resource_prime_ref(res); *prime_fd = ret; ret = 0; } else diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index eabe3e8e9cf9..bb11f0d0b9b1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -853,6 +853,8 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, pgoff_t end, pgoff_t *num_prefault); +void vmw_resource_prime_ref(struct vmw_resource *res); +void vmw_resource_prime_unref(struct vmw_resource *res); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index ce609e7d758f..f41dc638df23 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* - * Copyright 2021 VMware, Inc. + * Copyright 2021-2022 VMware, Inc. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -46,9 +46,8 @@ vmw_buffer_object(struct ttm_buffer_object *bo) static void vmw_gem_object_free(struct drm_gem_object *gobj) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gobj); - if (bo) { + if (bo) ttm_bo_put(bo); - } } static int vmw_gem_object_open(struct drm_gem_object *obj, @@ -158,7 +157,6 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, return ret; } - int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index 2d72a5ee7c0c..2896d212db54 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2013 VMware, Inc., Palo Alto, CA., USA + * Copyright 2013-2022 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,6 +31,7 @@ */ #include "vmwgfx_drv.h" +#include "vmwgfx_resource_priv.h" #include "ttm_object.h" #include <linux/dma-buf.h> @@ -62,12 +63,21 @@ static void vmw_prime_unmap_dma_buf(struct dma_buf_attachment *attach, { } +static void vmw_prime_release(struct dma_buf *dma_buf) +{ + struct ttm_prime_object *prime = dma_buf->priv; + struct vmw_resource *res = + user_surface_converter->base_obj_to_res(&prime->base); + if (res) + vmw_resource_prime_unref(res); +} + const struct dma_buf_ops vmw_prime_dmabuf_ops = { .attach = vmw_prime_map_attach, .detach = vmw_prime_map_detach, .map_dma_buf = vmw_prime_map_dma_buf, .unmap_dma_buf = vmw_prime_unmap_dma_buf, - .release = NULL, + .release = vmw_prime_release, }; int vmw_prime_fd_to_handle(struct drm_device *dev, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 6542f1498651..11de5d697351 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1169,3 +1169,15 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, return 0; } + +void vmw_resource_prime_ref(struct vmw_resource *res) +{ + if (res->backup) + drm_gem_object_get(&res->backup->base.base); +} + +void vmw_resource_prime_unref(struct vmw_resource *res) +{ + if (res->backup) + drm_gem_object_put(&res->backup->base.base); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 00e8e27e4884..04fdf613df83 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1502,6 +1502,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, goto out_unlock; } else { backup_handle = req->base.buffer_handle; + drm_gem_object_get(&res->backup->base.base); } } } else if (req->base.drm_surface_flags & -- 2.32.0

3 years, 5 months

1
0
0 0

[patch 15/16] mm/kmemleak: reset tag when compare object pointer

by Andrew Morton

From: Kuan-Ying Lee <Kuan-Ying.Lee(a)mediatek.com> Subject: mm/kmemleak: reset tag when compare object pointer When we use HW-tag based kasan and enable vmalloc support, we hit the following bug. It is due to comparison between tagged object and non-tagged pointer. We need to reset the kasan tag when we need to compare tagged object and non-tagged pointer. [ 7.690429][T400001] init: kmemleak: [name:kmemleak&]Scan area larger than object 0xffffffe77076f440 [ 7.691762][T400001] init: CPU: 4 PID: 1 Comm: init Tainted: G S W 5.15.25-android13-0-g5cacf919c2bc #1 [ 7.693218][T400001] init: Hardware name: MT6983(ENG) (DT) [ 7.693983][T400001] init: Call trace: [ 7.694508][T400001] init: dump_backtrace.cfi_jt+0x0/0x8 [ 7.695272][T400001] init: dump_stack_lvl+0xac/0x120 [ 7.695985][T400001] init: add_scan_area+0xc4/0x244 [ 7.696685][T400001] init: kmemleak_scan_area+0x40/0x9c [ 7.697428][T400001] init: layout_and_allocate+0x1e8/0x288 [ 7.698211][T400001] init: load_module+0x2c8/0xf00 [ 7.698895][T400001] init: __se_sys_finit_module+0x190/0x1d0 [ 7.699701][T400001] init: __arm64_sys_finit_module+0x20/0x30 [ 7.700517][T400001] init: invoke_syscall+0x60/0x170 [ 7.701225][T400001] init: el0_svc_common+0xc8/0x114 [ 7.701933][T400001] init: do_el0_svc+0x28/0xa0 [ 7.702580][T400001] init: el0_svc+0x60/0xf8 [ 7.703196][T400001] init: el0t_64_sync_handler+0x88/0xec [ 7.703964][T400001] init: el0t_64_sync+0x1b4/0x1b8 [ 7.704658][T400001] init: kmemleak: [name:kmemleak&]Object 0xf5ffffe77076b000 (size 32768): [ 7.705824][T400001] init: kmemleak: [name:kmemleak&] comm "init", pid 1, jiffies 4294894197 [ 7.707002][T400001] init: kmemleak: [name:kmemleak&] min_count = 0 [ 7.707886][T400001] init: kmemleak: [name:kmemleak&] count = 0 [ 7.708718][T400001] init: kmemleak: [name:kmemleak&] flags = 0x1 [ 7.709574][T400001] init: kmemleak: [name:kmemleak&] checksum = 0 [ 7.710440][T400001] init: kmemleak: [name:kmemleak&] backtrace: [ 7.711284][T400001] init: module_alloc+0x9c/0x120 [ 7.712015][T400001] init: move_module+0x34/0x19c [ 7.712735][T400001] init: layout_and_allocate+0x1c4/0x288 [ 7.713561][T400001] init: load_module+0x2c8/0xf00 [ 7.714291][T400001] init: __se_sys_finit_module+0x190/0x1d0 [ 7.715142][T400001] init: __arm64_sys_finit_module+0x20/0x30 [ 7.716004][T400001] init: invoke_syscall+0x60/0x170 [ 7.716758][T400001] init: el0_svc_common+0xc8/0x114 [ 7.717512][T400001] init: do_el0_svc+0x28/0xa0 [ 7.718207][T400001] init: el0_svc+0x60/0xf8 [ 7.718869][T400001] init: el0t_64_sync_handler+0x88/0xec [ 7.719683][T400001] init: el0t_64_sync+0x1b4/0x1b8 Link: https://lkml.kernel.org/r/20220318034051.30687-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee <Kuan-Ying.Lee(a)mediatek.com> Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Matthias Brugger <matthias.bgg(a)gmail.com> Cc: Chinwen Chang <chinwen.chang(a)mediatek.com> Cc: Nicholas Tang <nicholas.tang(a)mediatek.com> Cc: Yee Lee <yee.lee(a)mediatek.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/kmemleak.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) --- a/mm/kmemleak.c~mm-kmemleak-reset-tag-when-compare-object-pointer +++ a/mm/kmemleak.c @@ -796,6 +796,8 @@ static void add_scan_area(unsigned long unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area = NULL; + unsigned long untagged_ptr; + unsigned long untagged_objp; object = find_and_get_object(ptr, 1); if (!object) { @@ -804,6 +806,9 @@ static void add_scan_area(unsigned long return; } + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + if (scan_area_cache) area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); @@ -815,8 +820,8 @@ static void add_scan_area(unsigned long goto out_unlock; } if (size == SIZE_MAX) { - size = object->pointer + object->size - ptr; - } else if (ptr + size > object->pointer + object->size) { + size = untagged_objp + object->size - untagged_ptr; + } else if (untagged_ptr + size > untagged_objp + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); _

3 years, 5 months

1
0
0 0

[patch 11/16] mm,hwpoison: unmap poisoned page before invalidation

by Andrew Morton

From: Rik van Riel <riel(a)surriel.com> Subject: mm,hwpoison: unmap poisoned page before invalidation In some cases it appears the invalidation of a hwpoisoned page fails because the page is still mapped in another process. This can cause a program to be continuously restarted and die when it page faults on the page that was not invalidated. Avoid that problem by unmapping the hwpoisoned page when we find it. Another issue is that sometimes we end up oopsing in finish_fault, if the code tries to do something with the now-NULL vmf->page. I did not hit this error when submitting the previous patch because there are several opportunities for alloc_set_pte to bail out before accessing vmf->page, and that apparently happened on those systems, and most of the time on other systems, too. However, across several million systems that error does occur a handful of times a day. It can be avoided by returning VM_FAULT_NOPAGE which will cause do_read_fault to return before calling finish_fault. Link: https://lkml.kernel.org/r/20220325161428.5068d97e@imladris.surriel.com Fixes: e53ac7374e64 ("mm: invalidate hwpoison page cache page in fault path") Signed-off-by: Rik van Riel <riel(a)surriel.com> Reviewed-by: Miaohe Lin <linmiaohe(a)huawei.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) --- a/mm/memory.c~mmhwpoison-unmap-poisoned-page-before-invalidation +++ a/mm/memory.c @@ -3918,14 +3918,18 @@ static vm_fault_t __do_fault(struct vm_f return ret; if (unlikely(PageHWPoison(vmf->page))) { + struct page *page = vmf->page; vm_fault_t poisonret = VM_FAULT_HWPOISON; if (ret & VM_FAULT_LOCKED) { + if (page_mapped(page)) + unmap_mapping_pages(page_mapping(page), + page->index, 1, false); /* Retry if a clean page was removed from the cache. */ - if (invalidate_inode_page(vmf->page)) - poisonret = 0; - unlock_page(vmf->page); + if (invalidate_inode_page(page)) + poisonret = VM_FAULT_NOPAGE; + unlock_page(page); } - put_page(vmf->page); + put_page(page); vmf->page = NULL; return poisonret; } _

3 years, 5 months

1
0
0 0

[patch 02/16] ocfs2: fix crash when mount with quota enabled

by Andrew Morton

From: Joseph Qi <joseph.qi(a)linux.alibaba.com> Subject: ocfs2: fix crash when mount with quota enabled There is a reported crash when mounting ocfs2 with quota enabled. RIP: 0010:ocfs2_qinfo_lock_res_init+0x44/0x50 [ocfs2] Call Trace: <TASK> ocfs2_local_read_info+0xb9/0x6f0 [ocfs2] ? ocfs2_local_check_quota_file+0x197/0x390 [ocfs2] dquot_load_quota_sb+0x216/0x470 ? preempt_count_add+0x68/0xa0 dquot_load_quota_inode+0x85/0x100 ocfs2_enable_quotas+0xa0/0x1c0 [ocfs2] ocfs2_fill_super.cold+0xc8/0x1bf [ocfs2] mount_bdev+0x185/0x1b0 ? ocfs2_initialize_super.isra.0+0xf40/0xf40 [ocfs2] legacy_get_tree+0x27/0x40 vfs_get_tree+0x25/0xb0 path_mount+0x465/0xac0 __x64_sys_mount+0x103/0x140 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae </TASK> It is caused by when initializing dqi_gqlock, the corresponding dqi_type and dqi_sb are not properly initialized. This issue is introduced by commit 6c85c2c72819, which wants to avoid accessing uninitialized variables in error cases. So make global quota info properly initialized. Link: https://lkml.kernel.org/r/20220323023644.40084-1-joseph.qi@linux.alibaba.com Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1007141 Fixes: 6c85c2c72819 ("ocfs2: quota_local: fix possible uninitialized-variable access in ocfs2_local_read_info()") Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com> Reported-by: Dayvison <sathlerds(a)gmail.com> Tested-by: Valentin Vidic <vvidic(a)valentin-vidic.from.hr> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/ocfs2/quota_global.c | 23 ++++++++++++----------- fs/ocfs2/quota_local.c | 2 -- 2 files changed, 12 insertions(+), 13 deletions(-) --- a/fs/ocfs2/quota_global.c~ocfs2-fix-crash-when-mount-with-quota-enabled +++ a/fs/ocfs2/quota_global.c @@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2 /* Read information header from global quota file */ int ocfs2_global_read_info(struct super_block *sb, int type) { - struct inode *gqinode = NULL; unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; struct ocfs2_global_disk_dqinfo dinfo; @@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_ u64 pcount; int status; + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + /* Read global header */ - gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], OCFS2_INVALID_SLOT); - if (!gqinode) { + if (!oinfo->dqi_gqinode) { mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", type); status = -EINVAL; goto out_err; } - oinfo->dqi_gi.dqi_sb = sb; - oinfo->dqi_gi.dqi_type = type; - oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); - oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; - oinfo->dqi_gqi_bh = NULL; - oinfo->dqi_gqi_count = 0; - oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_err; } - status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, + status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); if (status < 0) goto out_unlock; --- a/fs/ocfs2/quota_local.c~ocfs2-fix-crash-when-mount-with-quota-enabled +++ a/fs/ocfs2/quota_local.c @@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct info->dqi_priv = oinfo; oinfo->dqi_type = type; INIT_LIST_HEAD(&oinfo->dqi_chunk); - oinfo->dqi_gqinode = NULL; - ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; oinfo->dqi_libh = NULL; _

3 years, 5 months

1
0
0 0

[patch 01/16] Revert "mm: madvise: skip unmapped vma holes passed to process_madvise"

by Andrew Morton

From: Charan Teja Kalla <quic_charante(a)quicinc.com> Subject: Revert "mm: madvise: skip unmapped vma holes passed to process_madvise" This reverts commit 08095d6310a7 ("mm: madvise: skip unmapped vma holes passed to process_madvise") as process_madvise() fails to return the exact processed bytes in other cases too. As an example: if process_madvise() hits mlocked pages after processing some initial bytes passed in [start, end), it just returns EINVAL although some bytes are processed. Thus making an exception only for ENOMEM is partially fixing the problem of returning the proper advised bytes. Thus revert this patch and return proper bytes advised. Link: https://lkml.kernel.org/r/e73da1304a88b6a8a11907045117cccf4c2b8374.16480466… Fixes: 08095d6310a7ce ("mm: madvise: skip unmapped vma holes passed to process_madvise") Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: David Rientjes <rientjes(a)google.com> Cc: Nadav Amit <nadav.amit(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/madvise.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) --- a/mm/madvise.c~revert-mm-madvise-skip-unmapped-vma-holes-passed-to-process_madvise +++ a/mm/madvise.c @@ -1464,16 +1464,9 @@ SYSCALL_DEFINE5(process_madvise, int, pi while (iov_iter_count(&iter)) { iovec = iov_iter_iovec(&iter); - /* - * do_madvise returns ENOMEM if unmapped holes are present - * in the passed VMA. process_madvise() is expected to skip - * unmapped holes passed to it in the 'struct iovec' list - * and not fail because of them. Thus treat -ENOMEM return - * from do_madvise as valid and continue processing. - */ ret = do_madvise(mm, (unsigned long)iovec.iov_base, iovec.iov_len, behavior); - if (ret < 0 && ret != -ENOMEM) + if (ret < 0) break; iov_iter_advance(&iter, iovec.iov_len); } _

3 years, 5 months

1
0
0 0

[PATCH 4.14 00/27] 4.14.275-rc1 review

by Greg Kroah-Hartman

This is the start of the stable review cycle for the 4.14.275 release. There are 27 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Sun, 03 Apr 2022 06:36:16 +0000. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.275-r… or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y and the diffstat can be found below. thanks, greg k-h ------------- Pseudo-Shortlog of commits: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Linux 4.14.275-rc1 James Morse <james.morse(a)arm.com> arm64: Use the clearbhb instruction in mitigations James Morse <james.morse(a)arm.com> arm64: add ID_AA64ISAR2_EL1 sys register James Morse <james.morse(a)arm.com> KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated James Morse <james.morse(a)arm.com> arm64: Mitigate spectre style branch history side channels James Morse <james.morse(a)arm.com> KVM: arm64: Add templates for BHB mitigation sequences James Morse <james.morse(a)arm.com> arm64: proton-pack: Report Spectre-BHB vulnerabilities as part of Spectre-v2 James Morse <james.morse(a)arm.com> arm64: Add percpu vectors for EL1 James Morse <james.morse(a)arm.com> arm64: entry: Add macro for reading symbol addresses from the trampoline James Morse <james.morse(a)arm.com> arm64: entry: Add vectors that have the bhb mitigation sequences James Morse <james.morse(a)arm.com> arm64: entry: Add non-kpti __bp_harden_el1_vectors for mitigations James Morse <james.morse(a)arm.com> arm64: entry: Allow the trampoline text to occupy multiple pages James Morse <james.morse(a)arm.com> arm64: entry: Make the kpti trampoline's kpti sequence optional James Morse <james.morse(a)arm.com> arm64: entry: Move trampoline macros out of ifdef'd section James Morse <james.morse(a)arm.com> arm64: entry: Don't assume tramp_vectors is the start of the vectors James Morse <james.morse(a)arm.com> arm64: entry: Allow tramp_alias to access symbols after the 4K boundary James Morse <james.morse(a)arm.com> arm64: entry: Move the trampoline data page before the text page James Morse <james.morse(a)arm.com> arm64: entry: Free up another register on kpti's tramp_exit path James Morse <james.morse(a)arm.com> arm64: entry: Make the trampoline cleanup optional James Morse <james.morse(a)arm.com> arm64: entry.S: Add ventry overflow sanity checks Anshuman Khandual <anshuman.khandual(a)arm.com> arm64: Add Cortex-X2 CPU part definition Suzuki K Poulose <suzuki.poulose(a)arm.com> arm64: Add Neoverse-N2, Cortex-A710 CPU part definition Rob Herring <robh(a)kernel.org> arm64: Add part number for Arm Cortex-A77 Marc Zyngier <marc.zyngier(a)arm.com> arm64: Add part number for Neoverse N1 Marc Zyngier <marc.zyngier(a)arm.com> arm64: Make ARM64_ERRATUM_1188873 depend on COMPAT Marc Zyngier <marc.zyngier(a)arm.com> arm64: Add silicon-errata.txt entry for ARM erratum 1188873 Arnd Bergmann <arnd(a)arndb.de> arm64: arch_timer: avoid unused function warning Marc Zyngier <marc.zyngier(a)arm.com> arm64: arch_timer: Add workaround for ARM erratum 1188873 ------------- Diffstat: Documentation/arm64/silicon-errata.txt | 1 + Makefile | 4 +- arch/arm/include/asm/kvm_host.h | 6 + arch/arm64/Kconfig | 24 ++ arch/arm64/include/asm/assembler.h | 34 +++ arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/cpucaps.h | 4 +- arch/arm64/include/asm/cpufeature.h | 39 ++++ arch/arm64/include/asm/cputype.h | 20 ++ arch/arm64/include/asm/fixmap.h | 6 +- arch/arm64/include/asm/kvm_host.h | 5 + arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/include/asm/mmu.h | 8 +- arch/arm64/include/asm/sections.h | 6 + arch/arm64/include/asm/sysreg.h | 5 + arch/arm64/include/asm/vectors.h | 74 ++++++ arch/arm64/kernel/bpi.S | 55 +++++ arch/arm64/kernel/cpu_errata.c | 395 ++++++++++++++++++++++++++++++++- arch/arm64/kernel/cpufeature.c | 21 ++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kernel/entry.S | 196 ++++++++++++---- arch/arm64/kernel/vmlinux.lds.S | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 4 + arch/arm64/kvm/hyp/switch.c | 9 +- arch/arm64/mm/mmu.c | 11 +- drivers/clocksource/arm_arch_timer.c | 15 ++ include/linux/arm-smccc.h | 7 + virt/kvm/arm/psci.c | 12 + 28 files changed, 909 insertions(+), 58 deletions(-)

3 years, 5 months

3
29
0 0

[patch 15/16] mm/kmemleak: reset tag when compare object pointer

by Andrew Morton

From: Kuan-Ying Lee <Kuan-Ying.Lee(a)mediatek.com> Subject: mm/kmemleak: reset tag when compare object pointer When we use HW-tag based kasan and enable vmalloc support, we hit the following bug. It is due to comparison between tagged object and non-tagged pointer. We need to reset the kasan tag when we need to compare tagged object and non-tagged pointer. [ 7.690429][T400001] init: kmemleak: [name:kmemleak&]Scan area larger than object 0xffffffe77076f440 [ 7.691762][T400001] init: CPU: 4 PID: 1 Comm: init Tainted: G S W 5.15.25-android13-0-g5cacf919c2bc #1 [ 7.693218][T400001] init: Hardware name: MT6983(ENG) (DT) [ 7.693983][T400001] init: Call trace: [ 7.694508][T400001] init: dump_backtrace.cfi_jt+0x0/0x8 [ 7.695272][T400001] init: dump_stack_lvl+0xac/0x120 [ 7.695985][T400001] init: add_scan_area+0xc4/0x244 [ 7.696685][T400001] init: kmemleak_scan_area+0x40/0x9c [ 7.697428][T400001] init: layout_and_allocate+0x1e8/0x288 [ 7.698211][T400001] init: load_module+0x2c8/0xf00 [ 7.698895][T400001] init: __se_sys_finit_module+0x190/0x1d0 [ 7.699701][T400001] init: __arm64_sys_finit_module+0x20/0x30 [ 7.700517][T400001] init: invoke_syscall+0x60/0x170 [ 7.701225][T400001] init: el0_svc_common+0xc8/0x114 [ 7.701933][T400001] init: do_el0_svc+0x28/0xa0 [ 7.702580][T400001] init: el0_svc+0x60/0xf8 [ 7.703196][T400001] init: el0t_64_sync_handler+0x88/0xec [ 7.703964][T400001] init: el0t_64_sync+0x1b4/0x1b8 [ 7.704658][T400001] init: kmemleak: [name:kmemleak&]Object 0xf5ffffe77076b000 (size 32768): [ 7.705824][T400001] init: kmemleak: [name:kmemleak&] comm "init", pid 1, jiffies 4294894197 [ 7.707002][T400001] init: kmemleak: [name:kmemleak&] min_count = 0 [ 7.707886][T400001] init: kmemleak: [name:kmemleak&] count = 0 [ 7.708718][T400001] init: kmemleak: [name:kmemleak&] flags = 0x1 [ 7.709574][T400001] init: kmemleak: [name:kmemleak&] checksum = 0 [ 7.710440][T400001] init: kmemleak: [name:kmemleak&] backtrace: [ 7.711284][T400001] init: module_alloc+0x9c/0x120 [ 7.712015][T400001] init: move_module+0x34/0x19c [ 7.712735][T400001] init: layout_and_allocate+0x1c4/0x288 [ 7.713561][T400001] init: load_module+0x2c8/0xf00 [ 7.714291][T400001] init: __se_sys_finit_module+0x190/0x1d0 [ 7.715142][T400001] init: __arm64_sys_finit_module+0x20/0x30 [ 7.716004][T400001] init: invoke_syscall+0x60/0x170 [ 7.716758][T400001] init: el0_svc_common+0xc8/0x114 [ 7.717512][T400001] init: do_el0_svc+0x28/0xa0 [ 7.718207][T400001] init: el0_svc+0x60/0xf8 [ 7.718869][T400001] init: el0t_64_sync_handler+0x88/0xec [ 7.719683][T400001] init: el0t_64_sync+0x1b4/0x1b8 Link: https://lkml.kernel.org/r/20220318034051.30687-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee <Kuan-Ying.Lee(a)mediatek.com> Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Matthias Brugger <matthias.bgg(a)gmail.com> Cc: Chinwen Chang <chinwen.chang(a)mediatek.com> Cc: Nicholas Tang <nicholas.tang(a)mediatek.com> Cc: Yee Lee <yee.lee(a)mediatek.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/kmemleak.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) --- a/mm/kmemleak.c~mm-kmemleak-reset-tag-when-compare-object-pointer +++ a/mm/kmemleak.c @@ -796,6 +796,8 @@ static void add_scan_area(unsigned long unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area = NULL; + unsigned long untagged_ptr; + unsigned long untagged_objp; object = find_and_get_object(ptr, 1); if (!object) { @@ -804,6 +806,9 @@ static void add_scan_area(unsigned long return; } + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + if (scan_area_cache) area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); @@ -815,8 +820,8 @@ static void add_scan_area(unsigned long goto out_unlock; } if (size == SIZE_MAX) { - size = object->pointer + object->size - ptr; - } else if (ptr + size > object->pointer + object->size) { + size = untagged_objp + object->size - untagged_ptr; + } else if (untagged_ptr + size > untagged_objp + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); _

3 years, 5 months

1
0
0 0

[patch 11/16] mm,hwpoison: unmap poisoned page before invalidation

by Andrew Morton

From: Rik van Riel <riel(a)surriel.com> Subject: mm,hwpoison: unmap poisoned page before invalidation In some cases it appears the invalidation of a hwpoisoned page fails because the page is still mapped in another process. This can cause a program to be continuously restarted and die when it page faults on the page that was not invalidated. Avoid that problem by unmapping the hwpoisoned page when we find it. Another issue is that sometimes we end up oopsing in finish_fault, if the code tries to do something with the now-NULL vmf->page. I did not hit this error when submitting the previous patch because there are several opportunities for alloc_set_pte to bail out before accessing vmf->page, and that apparently happened on those systems, and most of the time on other systems, too. However, across several million systems that error does occur a handful of times a day. It can be avoided by returning VM_FAULT_NOPAGE which will cause do_read_fault to return before calling finish_fault. Link: https://lkml.kernel.org/r/20220325161428.5068d97e@imladris.surriel.com Fixes: e53ac7374e64 ("mm: invalidate hwpoison page cache page in fault path") Signed-off-by: Rik van Riel <riel(a)surriel.com> Reviewed-by: Miaohe Lin <linmiaohe(a)huawei.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) --- a/mm/memory.c~mmhwpoison-unmap-poisoned-page-before-invalidation +++ a/mm/memory.c @@ -3918,14 +3918,18 @@ static vm_fault_t __do_fault(struct vm_f return ret; if (unlikely(PageHWPoison(vmf->page))) { + struct page *page = vmf->page; vm_fault_t poisonret = VM_FAULT_HWPOISON; if (ret & VM_FAULT_LOCKED) { + if (page_mapped(page)) + unmap_mapping_pages(page_mapping(page), + page->index, 1, false); /* Retry if a clean page was removed from the cache. */ - if (invalidate_inode_page(vmf->page)) - poisonret = 0; - unlock_page(vmf->page); + if (invalidate_inode_page(page)) + poisonret = VM_FAULT_NOPAGE; + unlock_page(page); } - put_page(vmf->page); + put_page(page); vmf->page = NULL; return poisonret; } _

3 years, 5 months

1
0
0 0

[patch 02/16] ocfs2: fix crash when mount with quota enabled

by Andrew Morton

From: Joseph Qi <joseph.qi(a)linux.alibaba.com> Subject: ocfs2: fix crash when mount with quota enabled There is a reported crash when mounting ocfs2 with quota enabled. RIP: 0010:ocfs2_qinfo_lock_res_init+0x44/0x50 [ocfs2] Call Trace: <TASK> ocfs2_local_read_info+0xb9/0x6f0 [ocfs2] ? ocfs2_local_check_quota_file+0x197/0x390 [ocfs2] dquot_load_quota_sb+0x216/0x470 ? preempt_count_add+0x68/0xa0 dquot_load_quota_inode+0x85/0x100 ocfs2_enable_quotas+0xa0/0x1c0 [ocfs2] ocfs2_fill_super.cold+0xc8/0x1bf [ocfs2] mount_bdev+0x185/0x1b0 ? ocfs2_initialize_super.isra.0+0xf40/0xf40 [ocfs2] legacy_get_tree+0x27/0x40 vfs_get_tree+0x25/0xb0 path_mount+0x465/0xac0 __x64_sys_mount+0x103/0x140 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae </TASK> It is caused by when initializing dqi_gqlock, the corresponding dqi_type and dqi_sb are not properly initialized. This issue is introduced by commit 6c85c2c72819, which wants to avoid accessing uninitialized variables in error cases. So make global quota info properly initialized. Link: https://lkml.kernel.org/r/20220323023644.40084-1-joseph.qi@linux.alibaba.com Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1007141 Fixes: 6c85c2c72819 ("ocfs2: quota_local: fix possible uninitialized-variable access in ocfs2_local_read_info()") Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com> Reported-by: Dayvison <sathlerds(a)gmail.com> Tested-by: Valentin Vidic <vvidic(a)valentin-vidic.from.hr> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/ocfs2/quota_global.c | 23 ++++++++++++----------- fs/ocfs2/quota_local.c | 2 -- 2 files changed, 12 insertions(+), 13 deletions(-) --- a/fs/ocfs2/quota_global.c~ocfs2-fix-crash-when-mount-with-quota-enabled +++ a/fs/ocfs2/quota_global.c @@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2 /* Read information header from global quota file */ int ocfs2_global_read_info(struct super_block *sb, int type) { - struct inode *gqinode = NULL; unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; struct ocfs2_global_disk_dqinfo dinfo; @@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_ u64 pcount; int status; + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + /* Read global header */ - gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], OCFS2_INVALID_SLOT); - if (!gqinode) { + if (!oinfo->dqi_gqinode) { mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", type); status = -EINVAL; goto out_err; } - oinfo->dqi_gi.dqi_sb = sb; - oinfo->dqi_gi.dqi_type = type; - oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); - oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; - oinfo->dqi_gqi_bh = NULL; - oinfo->dqi_gqi_count = 0; - oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_err; } - status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, + status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); if (status < 0) goto out_unlock; --- a/fs/ocfs2/quota_local.c~ocfs2-fix-crash-when-mount-with-quota-enabled +++ a/fs/ocfs2/quota_local.c @@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct info->dqi_priv = oinfo; oinfo->dqi_type = type; INIT_LIST_HEAD(&oinfo->dqi_chunk); - oinfo->dqi_gqinode = NULL; - ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; oinfo->dqi_libh = NULL; _

3 years, 5 months

1
0
0 0

[patch 01/16] Revert "mm: madvise: skip unmapped vma holes passed to process_madvise"

by Andrew Morton

From: Charan Teja Kalla <quic_charante(a)quicinc.com> Subject: Revert "mm: madvise: skip unmapped vma holes passed to process_madvise" This reverts commit 08095d6310a7 ("mm: madvise: skip unmapped vma holes passed to process_madvise") as process_madvise() fails to return the exact processed bytes in other cases too. As an example: if process_madvise() hits mlocked pages after processing some initial bytes passed in [start, end), it just returns EINVAL although some bytes are processed. Thus making an exception only for ENOMEM is partially fixing the problem of returning the proper advised bytes. Thus revert this patch and return proper bytes advised. Link: https://lkml.kernel.org/r/e73da1304a88b6a8a11907045117cccf4c2b8374.16480466… Fixes: 08095d6310a7ce ("mm: madvise: skip unmapped vma holes passed to process_madvise") Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: David Rientjes <rientjes(a)google.com> Cc: Nadav Amit <nadav.amit(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/madvise.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) --- a/mm/madvise.c~revert-mm-madvise-skip-unmapped-vma-holes-passed-to-process_madvise +++ a/mm/madvise.c @@ -1464,16 +1464,9 @@ SYSCALL_DEFINE5(process_madvise, int, pi while (iov_iter_count(&iter)) { iovec = iov_iter_iovec(&iter); - /* - * do_madvise returns ENOMEM if unmapped holes are present - * in the passed VMA. process_madvise() is expected to skip - * unmapped holes passed to it in the 'struct iovec' list - * and not fail because of them. Thus treat -ENOMEM return - * from do_madvise as valid and continue processing. - */ ret = do_madvise(mm, (unsigned long)iovec.iov_base, iovec.iov_len, behavior); - if (ret < 0 && ret != -ENOMEM) + if (ret < 0) break; iov_iter_advance(&iter, iovec.iov_len); } _

3 years, 5 months

1
0
0 0

[PATCH] riscv: fix build with binutils 2.38

by Aurelien Jarno

From version 2.38, binutils default to ISA spec version 20191213. This means that the csr read/write (csrr*/csrw*) instructions and fence.i instruction has separated from the `I` extension, become two standalone extensions: Zicsr and Zifencei. As the kernel uses those instruction, this causes the following build failure: CC arch/riscv/kernel/vdso/vgettimeofday.o <<BUILDDIR>>/arch/riscv/include/asm/vdso/gettimeofday.h: Assembler messages: <<BUILDDIR>>/arch/riscv/include/asm/vdso/gettimeofday.h:71: Error: unrecognized opcode `csrr a5,0xc01' <<BUILDDIR>>/arch/riscv/include/asm/vdso/gettimeofday.h:71: Error: unrecognized opcode `csrr a5,0xc01' <<BUILDDIR>>/arch/riscv/include/asm/vdso/gettimeofday.h:71: Error: unrecognized opcode `csrr a5,0xc01' <<BUILDDIR>>/arch/riscv/include/asm/vdso/gettimeofday.h:71: Error: unrecognized opcode `csrr a5,0xc01' The fix is to specify those extensions explicitely in -march. However as older binutils version do not support this, we first need to detect that. Cc: stable(a)vger.kernel.org # 4.15+ Cc: Kito Cheng <kito.cheng(a)gmail.com> Signed-off-by: Aurelien Jarno <aurelien(a)aurel32.net> --- arch/riscv/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 8a107ed18b0d..7d81102cffd4 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -50,6 +50,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) -- 2.34.1

3 years, 5 months

6
12
0 0

FAILED: patch "[PATCH] scsi: scsi_debug: Fix qc_lock use in sdebug_blk_mq_poll()" failed to apply to 5.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3fd07aecb75003fbcb0b7c3124d12f71ffd360d8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <damien.lemoal(a)opensource.wdc.com> Date: Tue, 1 Mar 2022 20:30:09 +0900 Subject: [PATCH] scsi: scsi_debug: Fix qc_lock use in sdebug_blk_mq_poll() The use of the 'locked' boolean variable to control locking and unlocking of the qc_lock spinlock of struct sdebug_queue confuses sparse, leading to a warning about an unexpected unlock. Simplify the qc_lock lock/unlock handling code of this function to avoid this warning by removing the 'locked' boolean variable. This change also fixes unlocked access to the in_use_bm bitmap with the find_first_bit() function. Link: https://lore.kernel.org/r/20220301113009.595857-3-damien.lemoal@opensource.… Fixes: b05d4e481eff ("scsi: scsi_debug: Refine sdebug_blk_mq_poll()") Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <damien.lemoal(a)opensource.wdc.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index f4e97f2224b2..25fa8e93f5a8 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7509,7 +7509,6 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) { bool first; bool retiring = false; - bool locked = false; int num_entries = 0; unsigned int qc_idx = 0; unsigned long iflags; @@ -7525,11 +7524,9 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) if (qc_idx >= sdebug_max_queue) return 0; + spin_lock_irqsave(&sqp->qc_lock, iflags); + for (first = true; first || qc_idx + 1 < sdebug_max_queue; ) { - if (!locked) { - spin_lock_irqsave(&sqp->qc_lock, iflags); - locked = true; - } if (first) { first = false; if (!test_bit(qc_idx, sqp->in_use_bm)) @@ -7586,14 +7583,15 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) } WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE); spin_unlock_irqrestore(&sqp->qc_lock, iflags); - locked = false; scsi_done(scp); /* callback to mid level */ num_entries++; + spin_lock_irqsave(&sqp->qc_lock, iflags); if (find_first_bit(sqp->in_use_bm, sdebug_max_queue) >= sdebug_max_queue) - break; /* if no more then exit without retaking spinlock */ + break; } - if (locked) - spin_unlock_irqrestore(&sqp->qc_lock, iflags); + + spin_unlock_irqrestore(&sqp->qc_lock, iflags); + if (num_entries > 0) atomic_add(num_entries, &sdeb_mq_poll_count); return num_entries;

3 years, 5 months

1
0
0 0

[PATCH] usb: gadget: uvc: allow changing interface name via configfs

by Dan Vacura

Add a configfs entry, "function_name", to change the iInterface field for VideoControl. This name is used on host devices for user selection, useful when multiple cameras are present. The default will remain "UVC Camera". Cc: <stable(a)vger.kernel.org> # 5.10+ Signed-off-by: Dan Vacura <w36195(a)motorola.com> --- .../ABI/testing/configfs-usb-gadget-uvc | 1 + Documentation/usb/gadget-testing.rst | 1 + drivers/usb/gadget/function/f_uvc.c | 4 +- drivers/usb/gadget/function/u_uvc.h | 1 + drivers/usb/gadget/function/uvc_configfs.c | 41 +++++++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index 889ed45be4ca..611b23e6488d 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -7,6 +7,7 @@ Description: UVC function directory streaming_maxburst 0..15 (ss only) streaming_maxpacket 1..1023 (fs), 1..3072 (hs/ss) streaming_interval 1..16 + function_name string [32] =================== ============================= What: /config/usb-gadget/gadget/functions/uvc.name/control diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst index c6d034abce3a..1c37159fa171 100644 --- a/Documentation/usb/gadget-testing.rst +++ b/Documentation/usb/gadget-testing.rst @@ -787,6 +787,7 @@ The uvc function provides these attributes in its function directory: streaming_maxpacket maximum packet size this endpoint is capable of sending or receiving when this configuration is selected + function_name name of the interface =================== ================================================ There are also "control" and "streaming" subdirectories, each of which contain diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 71bb5e477dba..50e6e7a58b41 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(trace, "Trace level bitmask"); #define UVC_STRING_STREAMING_IDX 1 static struct usb_string uvc_en_us_strings[] = { - [UVC_STRING_CONTROL_IDX].s = "UVC Camera", + /* [UVC_STRING_CONTROL_IDX].s = DYNAMIC, */ [UVC_STRING_STREAMING_IDX].s = "Video Streaming", { } }; @@ -676,6 +676,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_hs_streaming_ep.bEndpointAddress = uvc->video.ep->address; uvc_ss_streaming_ep.bEndpointAddress = uvc->video.ep->address; + uvc_en_us_strings[UVC_STRING_CONTROL_IDX].s = opts->function_name; us = usb_gstrings_attach(cdev, uvc_function_strings, ARRAY_SIZE(uvc_en_us_strings)); if (IS_ERR(us)) { @@ -866,6 +867,7 @@ static struct usb_function_instance *uvc_alloc_inst(void) opts->streaming_interval = 1; opts->streaming_maxpacket = 1024; + snprintf(opts->function_name, sizeof(opts->function_name), "UVC Camera"); ret = uvcg_attach_configfs(opts); if (ret < 0) { diff --git a/drivers/usb/gadget/function/u_uvc.h b/drivers/usb/gadget/function/u_uvc.h index 9a01a7d4f17f..24b8681b0d6f 100644 --- a/drivers/usb/gadget/function/u_uvc.h +++ b/drivers/usb/gadget/function/u_uvc.h @@ -27,6 +27,7 @@ struct f_uvc_opts { unsigned int control_interface; unsigned int streaming_interface; + char function_name[32]; /* * Control descriptors array pointers for full-/high-speed and diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 77d64031aa9c..63b8d3758b38 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2425,10 +2425,51 @@ UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, 15); #undef UVCG_OPTS_ATTR +#define UVCG_OPTS_STRING_ATTR(cname, aname) \ +static ssize_t f_uvc_opts_string_##cname##_show(struct config_item *item,\ + char *page) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int result; \ + \ + mutex_lock(&opts->lock); \ + result = snprintf(page, sizeof(opts->aname), "%s", opts->aname);\ + mutex_unlock(&opts->lock); \ + \ + return result; \ +} \ + \ +static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ + const char *page, size_t len) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int ret = 0; \ + \ + mutex_lock(&opts->lock); \ + if (opts->refcnt) { \ + ret = -EBUSY; \ + goto end; \ + } \ + \ + ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \ + "%s", page); \ + \ +end: \ + mutex_unlock(&opts->lock); \ + return ret; \ +} \ + \ +UVC_ATTR(f_uvc_opts_string_, cname, aname) + +UVCG_OPTS_STRING_ATTR(function_name, function_name); + +#undef UVCG_OPTS_STRING_ATTR + static struct configfs_attribute *uvc_attrs[] = { &f_uvc_opts_attr_streaming_interval, &f_uvc_opts_attr_streaming_maxpacket, &f_uvc_opts_attr_streaming_maxburst, + &f_uvc_opts_string_attr_function_name, NULL, }; -- 2.32.0

3 years, 5 months

2
3
0 0

[PATCH AUTOSEL 4.9 01/16] ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111

by Sasha Levin

From: Zekun Shen <bruceshenzk(a)gmail.com> [ Upstream commit 564d4eceb97eaf381dd6ef6470b06377bb50c95a ] The bug was found during fuzzing. Stacktrace locates it in ath5k_eeprom_convert_pcal_info_5111. When none of the curve is selected in the loop, idx can go up to AR5K_EEPROM_N_PD_CURVES. The line makes pd out of bound. pd = &chinfo[pier].pd_curves[idx]; There are many OOB writes using pd later in the code. So I added a sanity check for idx. Checks for other loops involving AR5K_EEPROM_N_PD_CURVES are not needed as the loop index is not used outside the loops. The patch is NOT tested with real device. The following is the fuzzing report BUG: KASAN: slab-out-of-bounds in ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] Write of size 1 at addr ffff8880174a4d60 by task modprobe/214 CPU: 0 PID: 214 Comm: modprobe Not tainted 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] __kasan_report.cold+0x37/0x7c ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] kasan_report+0xe/0x20 ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] ? apic_timer_interrupt+0xa/0x20 ? ath5k_eeprom_init_11a_pcal_freq+0xbc0/0xbc0 [ath5k] ? ath5k_pci_eeprom_read+0x228/0x3c0 [ath5k] ath5k_eeprom_init+0x2513/0x6290 [ath5k] ? ath5k_eeprom_init_11a_pcal_freq+0xbc0/0xbc0 [ath5k] ? usleep_range+0xb8/0x100 ? apic_timer_interrupt+0xa/0x20 ? ath5k_eeprom_read_pcal_info_2413+0x2f20/0x2f20 [ath5k] ath5k_hw_init+0xb60/0x1970 [ath5k] ath5k_init_ah+0x6fe/0x2530 [ath5k] ? kasprintf+0xa6/0xe0 ? ath5k_stop+0x140/0x140 [ath5k] ? _dev_notice+0xf6/0xf6 ? apic_timer_interrupt+0xa/0x20 ath5k_pci_probe.cold+0x29a/0x3d6 [ath5k] ? ath5k_pci_eeprom_read+0x3c0/0x3c0 [ath5k] ? mutex_lock+0x89/0xd0 ? ath5k_pci_eeprom_read+0x3c0/0x3c0 [ath5k] local_pci_probe+0xd3/0x160 pci_device_probe+0x23f/0x3e0 ? pci_device_remove+0x280/0x280 ? pci_device_remove+0x280/0x280 really_probe+0x209/0x5d0 Reported-by: Brendan Dolan-Gavitt <brendandg(a)nyu.edu> Signed-off-by: Zekun Shen <bruceshenzk(a)gmail.com> Signed-off-by: Kalle Valo <quic_kvalo(a)quicinc.com> Link: https://lore.kernel.org/r/YckvDdj3mtCkDRIt@a-10-27-26-18.dynapool.vpn.nyu.e… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/net/wireless/ath/ath5k/eeprom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index 94d34ee02265..01163b333945 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode, } } + if (idx == AR5K_EEPROM_N_PD_CURVES) + goto err_out; + ee->ee_pd_gains[mode] = 1; pd = &chinfo[pier].pd_curves[idx]; -- 2.34.1

3 years, 5 months

1
15
0 0

[PATCH AUTOSEL 4.14 01/22] ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111

by Sasha Levin

From: Zekun Shen <bruceshenzk(a)gmail.com> [ Upstream commit 564d4eceb97eaf381dd6ef6470b06377bb50c95a ] The bug was found during fuzzing. Stacktrace locates it in ath5k_eeprom_convert_pcal_info_5111. When none of the curve is selected in the loop, idx can go up to AR5K_EEPROM_N_PD_CURVES. The line makes pd out of bound. pd = &chinfo[pier].pd_curves[idx]; There are many OOB writes using pd later in the code. So I added a sanity check for idx. Checks for other loops involving AR5K_EEPROM_N_PD_CURVES are not needed as the loop index is not used outside the loops. The patch is NOT tested with real device. The following is the fuzzing report BUG: KASAN: slab-out-of-bounds in ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] Write of size 1 at addr ffff8880174a4d60 by task modprobe/214 CPU: 0 PID: 214 Comm: modprobe Not tainted 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] __kasan_report.cold+0x37/0x7c ? ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] kasan_report+0xe/0x20 ath5k_eeprom_read_pcal_info_5111+0x126a/0x1390 [ath5k] ? apic_timer_interrupt+0xa/0x20 ? ath5k_eeprom_init_11a_pcal_freq+0xbc0/0xbc0 [ath5k] ? ath5k_pci_eeprom_read+0x228/0x3c0 [ath5k] ath5k_eeprom_init+0x2513/0x6290 [ath5k] ? ath5k_eeprom_init_11a_pcal_freq+0xbc0/0xbc0 [ath5k] ? usleep_range+0xb8/0x100 ? apic_timer_interrupt+0xa/0x20 ? ath5k_eeprom_read_pcal_info_2413+0x2f20/0x2f20 [ath5k] ath5k_hw_init+0xb60/0x1970 [ath5k] ath5k_init_ah+0x6fe/0x2530 [ath5k] ? kasprintf+0xa6/0xe0 ? ath5k_stop+0x140/0x140 [ath5k] ? _dev_notice+0xf6/0xf6 ? apic_timer_interrupt+0xa/0x20 ath5k_pci_probe.cold+0x29a/0x3d6 [ath5k] ? ath5k_pci_eeprom_read+0x3c0/0x3c0 [ath5k] ? mutex_lock+0x89/0xd0 ? ath5k_pci_eeprom_read+0x3c0/0x3c0 [ath5k] local_pci_probe+0xd3/0x160 pci_device_probe+0x23f/0x3e0 ? pci_device_remove+0x280/0x280 ? pci_device_remove+0x280/0x280 really_probe+0x209/0x5d0 Reported-by: Brendan Dolan-Gavitt <brendandg(a)nyu.edu> Signed-off-by: Zekun Shen <bruceshenzk(a)gmail.com> Signed-off-by: Kalle Valo <quic_kvalo(a)quicinc.com> Link: https://lore.kernel.org/r/YckvDdj3mtCkDRIt@a-10-27-26-18.dynapool.vpn.nyu.e… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/net/wireless/ath/ath5k/eeprom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index 94d34ee02265..01163b333945 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode, } } + if (idx == AR5K_EEPROM_N_PD_CURVES) + goto err_out; + ee->ee_pd_gains[mode] = 1; pd = &chinfo[pier].pd_curves[idx]; -- 2.34.1

3 years, 5 months

1
21
0 0

[PATCH AUTOSEL 4.19 01/29] drm: Add orientation quirk for GPD Win Max

by Sasha Levin

From: Anisse Astier <anisse(a)astier.eu> [ Upstream commit 0b464ca3e0dd3cec65f28bc6d396d82f19080f69 ] Panel is 800x1280, but mounted on a laptop form factor, sideways. Signed-off-by: Anisse Astier <anisse(a)astier.eu> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211229222200.53128-3-anisse… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3b70a338e5b4..265df1e67eb3 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -133,6 +133,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -- 2.34.1

3 years, 5 months

1
28
0 0

[PATCH AUTOSEL 5.4 01/37] drm: Add orientation quirk for GPD Win Max

by Sasha Levin

From: Anisse Astier <anisse(a)astier.eu> [ Upstream commit 0b464ca3e0dd3cec65f28bc6d396d82f19080f69 ] Panel is 800x1280, but mounted on a laptop form factor, sideways. Signed-off-by: Anisse Astier <anisse(a)astier.eu> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211229222200.53128-3-anisse… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 448c2f2d803a..f5ab891731d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -166,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -- 2.34.1

3 years, 5 months

1
36
0 0

[PATCH AUTOSEL 5.10 01/65] drm: Add orientation quirk for GPD Win Max

by Sasha Levin

From: Anisse Astier <anisse(a)astier.eu> [ Upstream commit 0b464ca3e0dd3cec65f28bc6d396d82f19080f69 ] Panel is 800x1280, but mounted on a laptop form factor, sideways. Signed-off-by: Anisse Astier <anisse(a)astier.eu> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211229222200.53128-3-anisse… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 448c2f2d803a..f5ab891731d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -166,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -- 2.34.1

3 years, 5 months

1
64
0 0

[PATCH AUTOSEL 5.15 01/98] drm: Add orientation quirk for GPD Win Max

by Sasha Levin

From: Anisse Astier <anisse(a)astier.eu> [ Upstream commit 0b464ca3e0dd3cec65f28bc6d396d82f19080f69 ] Panel is 800x1280, but mounted on a laptop form factor, sideways. Signed-off-by: Anisse Astier <anisse(a)astier.eu> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211229222200.53128-3-anisse… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 448c2f2d803a..f5ab891731d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -166,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -- 2.34.1

3 years, 5 months

1
97
0 0

[PATCH AUTOSEL 5.16 001/109] drm: Add orientation quirk for GPD Win Max

by Sasha Levin

From: Anisse Astier <anisse(a)astier.eu> [ Upstream commit 0b464ca3e0dd3cec65f28bc6d396d82f19080f69 ] Panel is 800x1280, but mounted on a laptop form factor, sideways. Signed-off-by: Anisse Astier <anisse(a)astier.eu> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com> Signed-off-by: Jani Nikula <jani.nikula(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211229222200.53128-3-anisse… Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index b910978d3e48..4e853acfd1e8 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -180,6 +180,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -- 2.34.1

3 years, 5 months

1
108
0 0

[PATCH 2/2] PCI: qcom: fix unbalanced phy init on probe errors

by Johan Hovold

Make sure to undo the PHY initialisation (e.g. balance runtime PM) in case host initialisation fails during probe. Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver") Cc: stable(a)vger.kernel.org # 4.5 Cc: Stanimir Varbanov <svarbanov(a)mm-sol.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/pci/controller/dwc/pcie-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 0b0bd71f1bd2..df47986bda29 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1624,11 +1624,13 @@ static int qcom_pcie_probe(struct platform_device *pdev) ret = dw_pcie_host_init(pp); if (ret) { dev_err(dev, "cannot initialize host\n"); - goto err_pm_runtime_put; + goto err_phy_exit; } return 0; +err_phy_exit: + phy_exit(pcie->phy); err_pm_runtime_put: pm_runtime_put(dev); pm_runtime_disable(dev); -- 2.35.1

3 years, 5 months

2
1
0 0

FAILED: patch "[PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d3925ff6433f98992685a9679613a2cc97f3ce2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y(a)gmail.com> Date: Fri, 11 Mar 2022 16:06:14 +0800 Subject: [PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path There is no need to call dev_kfree_skb() when usb_submit_urb() fails because can_put_echo_skb() deletes original skb and can_free_echo_skb() deletes the cloned skb. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com Cc: stable(a)vger.kernel.org Signed-off-by: Hangyu Hua <hbh25y(a)gmail.com> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3c..b638604bf1ee 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d3925ff6433f98992685a9679613a2cc97f3ce2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y(a)gmail.com> Date: Fri, 11 Mar 2022 16:06:14 +0800 Subject: [PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path There is no need to call dev_kfree_skb() when usb_submit_urb() fails because can_put_echo_skb() deletes original skb and can_free_echo_skb() deletes the cloned skb. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com Cc: stable(a)vger.kernel.org Signed-off-by: Hangyu Hua <hbh25y(a)gmail.com> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3c..b638604bf1ee 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d3925ff6433f98992685a9679613a2cc97f3ce2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y(a)gmail.com> Date: Fri, 11 Mar 2022 16:06:14 +0800 Subject: [PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path There is no need to call dev_kfree_skb() when usb_submit_urb() fails because can_put_echo_skb() deletes original skb and can_free_echo_skb() deletes the cloned skb. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com Cc: stable(a)vger.kernel.org Signed-off-by: Hangyu Hua <hbh25y(a)gmail.com> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3c..b638604bf1ee 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d3925ff6433f98992685a9679613a2cc97f3ce2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y(a)gmail.com> Date: Fri, 11 Mar 2022 16:06:14 +0800 Subject: [PATCH] can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path There is no need to call dev_kfree_skb() when usb_submit_urb() fails because can_put_echo_skb() deletes original skb and can_free_echo_skb() deletes the cloned skb. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com Cc: stable(a)vger.kernel.org Signed-off-by: Hangyu Hua <hbh25y(a)gmail.com> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3c..b638604bf1ee 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb);

3 years, 5 months

1
0
0 0

[PATCH 1/2] PCI: qcom: fix runtime PM imbalance on probe errors

by Johan Hovold

Drop the leftover pm_runtime_disable() calls from the late probe error paths that would, for example, prevent runtime PM from being reenabled after a probe deferral. Fixes: 6e5da6f7d824 ("PCI: qcom: Fix error handling in runtime PM support") Cc: stable(a)vger.kernel.org # 4.20 Cc: Bjorn Andersson <bjorn.andersson(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/pci/controller/dwc/pcie-qcom.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 20a0e6533a1c..0b0bd71f1bd2 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1616,17 +1616,14 @@ static int qcom_pcie_probe(struct platform_device *pdev) pp->ops = &qcom_pcie_dw_ops; ret = phy_init(pcie->phy); - if (ret) { - pm_runtime_disable(&pdev->dev); + if (ret) goto err_pm_runtime_put; - } platform_set_drvdata(pdev, pcie); ret = dw_pcie_host_init(pp); if (ret) { dev_err(dev, "cannot initialize host\n"); - pm_runtime_disable(&pdev->dev); goto err_pm_runtime_put; } -- 2.35.1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] can: m_can: m_can_tx_handler(): fix use after free of skb" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2e8e79c416aae1de224c0f1860f2e3350fa171f8 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde <mkl(a)pengutronix.de> Date: Thu, 17 Mar 2022 08:57:35 +0100 Subject: [PATCH] can: m_can: m_can_tx_handler(): fix use after free of skb can_put_echo_skb() will clone skb then free the skb. Move the can_put_echo_skb() for the m_can version 3.0.x directly before the start of the xmit in hardware, similar to the 3.1.x branch. Fixes: 80646733f11c ("can: m_can: update to support CAN FD features") Link: https://lore.kernel.org/all/20220317081305.739554-1-mkl@pengutronix.de Cc: stable(a)vger.kernel.org Reported-by: Hangyu Hua <hbh25y(a)gmail.com> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 1a4b56f6fa8c..b3b5bc1c803b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1637,8 +1637,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) if (err) goto out_fail; - can_put_echo_skb(skb, dev, 0, 0); - if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); cccr &= ~CCCR_CMR_MASK; @@ -1655,6 +1653,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_CCCR, cccr); } m_can_write(cdev, M_CAN_TXBTIE, 0x1); + + can_put_echo_skb(skb, dev, 0, 0); + m_can_write(cdev, M_CAN_TXBAR, 0x1); /* End of xmit function for version 3.0.x */ } else {

3 years, 5 months

1
0
0 0

Re: Patch "serial: sc16is7xx: Clear RS485 bits in the shutdown" has been added to the 4.9-stable tree

by Hui Wang

Hi Greg, Sorry, please don't apply the patch to stable kernels. I tried to add "linux,rs485-enabled-at-boot-time" support for this driver yesterday and found this patch is not correct, I will send a patch to revert this patch from the mainline kernel. Thanks, Hui. On 4/1/22 18:38, gregkh(a)linuxfoundation.org wrote: > This is a note to let you know that I've just added the patch titled > > serial: sc16is7xx: Clear RS485 bits in the shutdown > > to the 4.9-stable tree which can be found at: > http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… > > The filename of the patch is: > serial-sc16is7xx-clear-rs485-bits-in-the-shutdown.patch > and it can be found in the queue-4.9 subdirectory. > > If you, or anyone else, feels it should not be added to the stable tree, > please let <stable(a)vger.kernel.org> know about it. > > > >From 927728a34f11b5a27f4610bdb7068317d6fdc72a Mon Sep 17 00:00:00 2001 > From: Hui Wang <hui.wang(a)canonical.com> > Date: Tue, 8 Mar 2022 19:00:42 +0800 > Subject: serial: sc16is7xx: Clear RS485 bits in the shutdown > > From: Hui Wang <hui.wang(a)canonical.com> > > commit 927728a34f11b5a27f4610bdb7068317d6fdc72a upstream. > > We tested RS485 function on an EVB which has SC16IS752, after > finishing the test, we started the RS232 function test, but found the > RTS is still working in the RS485 mode. > > That is because both startup and shutdown call port_update() to set > the EFCR_REG, this will not clear the RS485 bits once the bits are set > in the reconf_rs485(). To fix it, clear the RS485 bits in shutdown. > > Cc: <stable(a)vger.kernel.org> > Signed-off-by: Hui Wang <hui.wang(a)canonical.com> > Link: https://lore.kernel.org/r/20220308110042.108451-1-hui.wang@canonical.com > Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> > --- > drivers/tty/serial/sc16is7xx.c | 6 ++++-- > 1 file changed, 4 insertions(+), 2 deletions(-) > > --- a/drivers/tty/serial/sc16is7xx.c > +++ b/drivers/tty/serial/sc16is7xx.c > @@ -1055,10 +1055,12 @@ static void sc16is7xx_shutdown(struct ua > > /* Disable all interrupts */ > sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0); > - /* Disable TX/RX */ > + /* Disable TX/RX, clear auto RS485 and RTS invert */ > sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, > SC16IS7XX_EFCR_RXDISABLE_BIT | > - SC16IS7XX_EFCR_TXDISABLE_BIT, > + SC16IS7XX_EFCR_TXDISABLE_BIT | > + SC16IS7XX_EFCR_AUTO_RS485_BIT | > + SC16IS7XX_EFCR_RTS_INVERT_BIT, > SC16IS7XX_EFCR_RXDISABLE_BIT | > SC16IS7XX_EFCR_TXDISABLE_BIT); > > > > Patches currently in stable-queue which might be from hui.wang(a)canonical.com are > > queue-4.9/serial-sc16is7xx-clear-rs485-bits-in-the-shutdown.patch

3 years, 5 months

2
1
0 0

[PATCH RESEND] PCI: qcom: fix pipe clock imbalance

by Johan Hovold

Commit ed8cc3b1fc84 ("PCI: qcom: Add support for SDM845 PCIe controller") introduced a clock imbalance by enabling the pipe clock both in init() and in post_init() but only disabling in post_deinit(). Note that the pipe clock was also never disabled in the init() error paths and that enabling the clock before powering up the PHY looks questionable. Fixes: ed8cc3b1fc84 ("PCI: qcom: Add support for SDM845 PCIe controller") Cc: stable(a)vger.kernel.org # 5.6 Cc: Bjorn Andersson <bjorn.andersson(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- Resending with lists on CC. Johan drivers/pci/controller/dwc/pcie-qcom.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index b79d98e5e228..20a0e6533a1c 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1238,12 +1238,6 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) goto err_disable_clocks; } - ret = clk_prepare_enable(res->pipe_clk); - if (ret) { - dev_err(dev, "cannot prepare/enable pipe clock\n"); - goto err_disable_clocks; - } - /* Wait for reset to complete, required on SM8450 */ usleep_range(1000, 1500); -- 2.35.1

3 years, 5 months

1
0
0 0

Re: kernel 5.16.12 and above broke yoga c930 sound and mic

by Greg KH

On Wed, Mar 30, 2022 at 04:54:53PM -0400, Joshua Freedman wrote: > This felt really anti-climactic haha, but hopefully it's useful? > cat bisect.log > Bisecting: 81 revisions left to test after this (roughly 6 steps) > [770aac3c84e0c83a19985413fa9fbfc126cc0ff6] net: mdio-ipq4019: add delay > after clock enable Wait, you still have more steps to go here. Did you test this kernel? If so, you need to continue using 'git bisect good' and 'git bisect bad' to find the offending commit. This looks just like the first step? thanks, greg k-h

3 years, 5 months

1
0
0 0

Re: kernel 5.16.12 and above broke yoga c930 sound and mic

by Greg KH

On Wed, Mar 30, 2022 at 04:54:53PM -0400, Joshua Freedman wrote: > This felt really anti-climactic haha, but hopefully it's useful? > cat bisect.log > Bisecting: 81 revisions left to test after this (roughly 6 steps) > [770aac3c84e0c83a19985413fa9fbfc126cc0ff6] net: mdio-ipq4019: add delay > after clock enable Ok, great! Can you start a new thread and add the developers of that change to to it to help track down the issue? thanks, greg k-h

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: rawnand: protect access to rawnand devices while in" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8cba323437a49a45756d661f500b324fc2d486fe Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer <sean(a)geanix.com> Date: Tue, 8 Feb 2022 09:52:13 +0100 Subject: [PATCH] mtd: rawnand: protect access to rawnand devices while in suspend Prevent rawnand access while in a suspended state. Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the rawnand layer to return errors rather than waiting in a blocking wait. Tested on a iMX6ULL. Fixes: 013e6292aaf5 ("mtd: rawnand: Simplify the locking") Signed-off-by: Sean Nyekjaer <sean(a)geanix.com> Reviewed-by: Boris Brezillon <boris.brezillon(a)collabora.com> Cc: stable(a)vger.kernel.org Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com> Link: https://lore.kernel.org/linux-mtd/20220208085213.1838273-1-sean@geanix.com diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 3e4a525ac3ca..612ae60e9763 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs) * * Return: -EBUSY if the chip has been suspended, 0 otherwise */ -static int nand_get_device(struct nand_chip *chip) +static void nand_get_device(struct nand_chip *chip) { - mutex_lock(&chip->lock); - if (chip->suspended) { + /* Wait until the device is resumed. */ + while (1) { + mutex_lock(&chip->lock); + if (!chip->suspended) { + mutex_lock(&chip->controller->lock); + return; + } mutex_unlock(&chip->lock); - return -EBUSY; - } - mutex_lock(&chip->controller->lock); - return 0; + wait_event(chip->resume_wq, !chip->suspended); + } } /** @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs) nand_erase_nand(chip, &einfo, 0); /* Write bad block marker to OOB */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); ret = nand_markbad_bbm(chip, ofs); nand_release_device(chip); @@ -3826,9 +3827,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, ops->mode != MTD_OPS_RAW) return -ENOTSUPP; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); if (!ops->datbuf) ret = nand_do_read_oob(chip, from, ops); @@ -4415,13 +4414,11 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct nand_chip *chip = mtd_to_nand(mtd); - int ret; + int ret = 0; ops->retlen = 0; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -4481,9 +4478,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, return -EIO; /* Grab the lock and see if the device is available */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); /* Shift to get first page */ page = (int)(instr->addr >> chip->page_shift); @@ -4570,7 +4565,7 @@ static void nand_sync(struct mtd_info *mtd) pr_debug("%s: called\n", __func__); /* Grab the lock and see if the device is available */ - WARN_ON(nand_get_device(chip)); + nand_get_device(chip); /* Release it and go back */ nand_release_device(chip); } @@ -4587,9 +4582,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) int ret; /* Select the NAND device */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); nand_select_target(chip, chipnr); @@ -4660,6 +4653,8 @@ static void nand_resume(struct mtd_info *mtd) __func__); } mutex_unlock(&chip->lock); + + wake_up_all(&chip->resume_wq); } /** @@ -5438,6 +5433,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, chip->cur_cs = -1; mutex_init(&chip->lock); + init_waitqueue_head(&chip->resume_wq); /* Enforce the right timings for reset/detection */ chip->current_interface_config = nand_get_reset_interface_config(); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5b88cd51fadb..dcf90144d70b 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1240,6 +1240,7 @@ struct nand_secure_region { * @lock: Lock protecting the suspended field. Also used to serialize accesses * to the NAND device * @suspended: Set to 1 when the device is suspended, 0 when it's not + * @resume_wq: wait queue to sleep if rawnand is in suspended state. * @cur_cs: Currently selected target. -1 means no target selected, otherwise we * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). * NAND Controller drivers should not modify this value, but they're @@ -1294,6 +1295,7 @@ struct nand_chip { /* Internals */ struct mutex lock; unsigned int suspended : 1; + wait_queue_head_t resume_wq; int cur_cs; int read_retries; struct nand_secure_region *secure_regions;

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 5.16-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.16-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 151c6b49d679872d6fc0b50e0ad96303091694a2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus <tudor.ambarus(a)microchip.com> Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus <tudor.ambarus(a)microchip.com> Reviewed-by: Michael Walle <michael(a)walle.cc> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.c… diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 9014008e60b3..b4f141ad9c9c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2948,10 +2948,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write)

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] pinctrl: samsung: drop pin banks references on error paths" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 50ebd19e3585b9792e994cfa8cbee8947fe06371 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk(a)kernel.org> Date: Tue, 11 Jan 2022 21:13:59 +0100 Subject: [PATCH] pinctrl: samsung: drop pin banks references on error paths The driver iterates over its devicetree children with for_each_child_of_node() and stores for later found node pointer. This has to be put in error paths to avoid leak during re-probing. Fixes: ab663789d697 ("pinctrl: samsung: Match pin banks with their device nodes") Cc: <stable(a)vger.kernel.org> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com> Reviewed-by: Sam Protsenko <semen.protsenko(a)linaro.org> Reviewed-by: Chanho Park <chanho61.park(a)samsung.com> Link: https://lore.kernel.org/r/20220111201426.326777-2-krzysztof.kozlowski@canon… diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 0f6e9305fec5..c4175fea7d74 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1002,6 +1002,16 @@ samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev) return &(of_data->ctrl[id]); } +static void samsung_banks_of_node_put(struct samsung_pinctrl_drv_data *d) +{ + struct samsung_pin_bank *bank; + unsigned int i; + + bank = d->pin_banks; + for (i = 0; i < d->nr_banks; ++i, ++bank) + of_node_put(bank->of_node); +} + /* retrieve the soc specific data */ static const struct samsung_pin_ctrl * samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, @@ -1117,19 +1127,19 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) if (ctrl->retention_data) { drvdata->retention_ctrl = ctrl->retention_data->init(drvdata, ctrl->retention_data); - if (IS_ERR(drvdata->retention_ctrl)) - return PTR_ERR(drvdata->retention_ctrl); + if (IS_ERR(drvdata->retention_ctrl)) { + ret = PTR_ERR(drvdata->retention_ctrl); + goto err_put_banks; + } } ret = samsung_pinctrl_register(pdev, drvdata); if (ret) - return ret; + goto err_put_banks; ret = samsung_gpiolib_register(pdev, drvdata); - if (ret) { - samsung_pinctrl_unregister(pdev, drvdata); - return ret; - } + if (ret) + goto err_unregister; if (ctrl->eint_gpio_init) ctrl->eint_gpio_init(drvdata); @@ -1139,6 +1149,12 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drvdata); return 0; + +err_unregister: + samsung_pinctrl_unregister(pdev, drvdata); +err_put_banks: + samsung_banks_of_node_put(drvdata); + return ret; } /*

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] NFS: NFSv2/v3 clients should never be setting NFS_CAP_XATTR" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b622ffe1d9ecbac71f0cddb52ff0831efdf8fb83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust(a)hammerspace.com> Date: Tue, 22 Feb 2022 18:20:38 -0500 Subject: [PATCH] NFS: NFSv2/v3 clients should never be setting NFS_CAP_XATTR Ensure that we always initialise the 'xattr_support' field in struct nfs_fsinfo, so that nfs_server_set_fsinfo() doesn't declare our NFSv2/v3 client to be capable of supporting the NFSv4.2 xattr protocol by setting the NFS_CAP_XATTR capability. This configuration can cause nfs_do_access() to set access mode bits that are unsupported by the NFSv3 ACCESS call, which may confuse spec-compliant servers. Reported-by: Olga Kornievskaia <kolga(a)netapp.com> Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") Cc: stable(a)vger.kernel.org Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9274c9c5efea..54a1d21cbcc6 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2228,6 +2228,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + result->xattr_support = 0; return 0; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 73dcaa99fa9b..e3570c656b0f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -92,6 +92,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + info->xattr_support = 0; return 0; }

3 years, 5 months

1
0
0 0

[PATCH 9/9] bfq: Make sure bfqg for which we are queueing requests is online

by Jan Kara

Bios queued into BFQ IO scheduler can be associated with a cgroup that was already offlined. This may then cause insertion of this bfq_group into a service tree. But this bfq_group will get freed as soon as last bio associated with it is completed leading to use after free issues for service tree users. Fix the problem by making sure we always operate on online bfq_group. If the bfq_group associated with the bio is not online, we pick the first online parent. CC: stable(a)vger.kernel.org Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support") Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-cgroup.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 32d2c2a47480..09574af83566 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -612,10 +612,19 @@ static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; + struct bfq_group *bfqg; - if (!blkg) - return bfqd->root_group; - return blkg_to_bfqg(blkg); + while (blkg) { + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); + return bfqg; + } + blkg = blkg->parent; + } + bio_associate_blkg_from_css(bio, + &bfqg_to_blkg(bfqd->root_group)->blkcg->css); + return bfqd->root_group; } /** -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 8/9] bfq: Get rid of __bio_blkcg() usage

by Jan Kara

BFQ usage of __bio_blkcg() is a relict from the past. Furthermore if bio would not be associated with any blkcg, the usage of __bio_blkcg() in BFQ is prone to races with the task being migrated between cgroups as __bio_blkcg() calls at different places could return different blkcgs. Convert BFQ to the new situation where bio->bi_blkg is initialized in bio_set_dev() and thus practically always valid. This allows us to save blkcg_gq lookup and noticeably simplify the code. CC: stable(a)vger.kernel.org Fixes: 0fe061b9f03c ("blkcg: fix ref count issue with bio_blkcg() using task_css") Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-cgroup.c | 63 +++++++++++++++++---------------------------- block/bfq-iosched.c | 11 +------- block/bfq-iosched.h | 3 +-- 3 files changed, 25 insertions(+), 52 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 879380c2bc7e..32d2c2a47480 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -586,27 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, entity->sched_data = &parent->sched_data; } -static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, - struct blkcg *blkcg) +static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { - struct blkcg_gq *blkg; - - blkg = blkg_lookup(blkcg, bfqd->queue); - if (likely(blkg)) - return blkg_to_bfqg(blkg); - return NULL; -} - -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg) -{ - struct bfq_group *bfqg, *parent; + struct bfq_group *parent; struct bfq_entity *entity; - bfqg = bfq_lookup_bfqg(bfqd, blkcg); - if (unlikely(!bfqg)) - return NULL; - /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet @@ -623,8 +607,15 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } +} - return bfqg; +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (!blkg) + return bfqd->root_group; + return blkg_to_bfqg(blkg); } /** @@ -714,25 +705,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. */ -static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct blkcg *blkcg) +static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); - - if (unlikely(!bfqg)) - bfqg = bfqd->root_group; - if (async_bfqq) { entity = &async_bfqq->entity; @@ -784,20 +765,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct bfq_group *bfqg = NULL; + struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; - rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) - goto out; + return; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + /* + * New cgroup for this process. Make sure it is linked to bfq internal + * cgroup hierarchy. + */ + bfq_link_bfqg(bfqd, bfqg); + __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following @@ -850,8 +835,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; -out: - rcu_read_unlock(); } /** @@ -1469,7 +1452,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg) +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index d7cf930b47bb..e47c75f1fa0f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5726,14 +5726,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; - rcu_read_lock(); - - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); - if (!bfqg) { - bfqq = &bfqd->oom_bfqq; - goto out; - } - + bfqg = bfq_bio_bfqg(bfqd, bio); if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); @@ -5779,8 +5772,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn) bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic); - - rcu_read_unlock(); return bfqq; } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 4664e2f3e828..978ef5d6fe6a 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -1009,8 +1009,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg); +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 7/9] bfq: Track whether bfq_group is still online

by Jan Kara

Track whether bfq_group is still online. We cannot rely on blkcg_gq->online because that gets cleared only after all policies are offlined and we need something that gets updated already under bfqd->lock when we are cleaning up our bfq_group to be able to guarantee that when we see online bfq_group, it will stay online while we are holding bfqd->lock lock. CC: stable(a)vger.kernel.org Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-cgroup.c | 3 ++- block/bfq-iosched.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9352f3cc2377..879380c2bc7e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; + bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -603,7 +604,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct bfq_entity *entity; bfqg = bfq_lookup_bfqg(bfqd, blkcg); - if (unlikely(!bfqg)) return NULL; @@ -979,6 +979,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); + bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index a56763045d19..4664e2f3e828 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -928,6 +928,8 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; + /* Is bfq_group still online? */ + bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 6/9] bfq: Remove pointless bfq_init_rq() calls

by Jan Kara

We call bfq_init_rq() from request merging functions where requests we get should have already gone through bfq_init_rq() during insert and anyway we want to do anything only if the request is already tracked by BFQ. So replace calls to bfq_init_rq() with RQ_BFQQ() instead to simply skip requests untracked by BFQ. We move bfq_init_rq() call in bfq_insert_request() a bit earlier to cover request merging and thus can transfer FIFO position in case of a merge. CC: stable(a)vger.kernel.org Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-iosched.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 19082e14f3c1..d7cf930b47bb 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2497,8 +2497,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } -static struct bfq_queue *bfq_init_rq(struct request *rq); - static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -2507,7 +2505,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = bfq_init_rq(req); + struct bfq_queue *bfqq = RQ_BFQQ(req); struct bfq_data *bfqd; struct request *prev, *next_rq; @@ -2559,8 +2557,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = bfq_init_rq(rq), - *next_bfqq = bfq_init_rq(next); + struct bfq_queue *bfqq = RQ_BFQQ(rq), + *next_bfqq = RQ_BFQQ(next); if (!bfqq) goto remove; @@ -6129,6 +6127,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -6144,6 +6144,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (blk_mq_sched_try_insert_merge(q, rq, &free)) { spin_unlock_irq(&bfqd->lock); blk_mq_free_requests(&free); @@ -6152,7 +6153,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, trace_block_rq_insert(rq); - bfqq = bfq_init_rq(rq); if (!bfqq || at_head) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 5/9] bfq: Drop pointless unlock-lock pair

by Jan Kara

In bfq_insert_request() we unlock bfqd->lock only to call trace_block_rq_insert() and then lock bfqd->lock again. This is really pointless since tracing is disabled if we really care about performance and even if the tracepoint is enabled, it is a quick call. CC: stable(a)vger.kernel.org Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-iosched.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 1fc4d4628fba..19082e14f3c1 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6150,11 +6150,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, return; } - spin_unlock_irq(&bfqd->lock); - trace_block_rq_insert(rq); - spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); if (!bfqq || at_head) { if (at_head) -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 4/9] bfq: Update cgroup information before merging bio

by Jan Kara

When the process is migrated to a different cgroup (or in case of writeback just starts submitting bios associated with a different cgroup) bfq_merge_bio() can operate with stale cgroup information in bic. Thus the bio can be merged to a request from a different cgroup or it can result in merging of bfqqs for different cgroups or bfqqs of already dead cgroups and causing possible use-after-free issues. Fix the problem by updating cgroup information in bfq_merge_bio(). CC: stable(a)vger.kernel.org Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support") Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-iosched.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 89fe3f85eb3c..1fc4d4628fba 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2457,10 +2457,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); - if (bic) + if (bic) { + /* + * Make sure cgroup info is uptodate for current process before + * considering the merge. + */ + bfq_bic_update_cgroup(bic, bio); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - else + } else { bfqd->bio_bfqq = NULL; + } bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 3/9] bfq: Split shared queues on move between cgroups

by Jan Kara

When bfqq is shared by multiple processes it can happen that one of the processes gets moved to a different cgroup (or just starts submitting IO for different cgroup). In case that happens we need to split the merged bfqq as otherwise we will have IO for multiple cgroups in one bfqq and we will just account IO time to wrong entities etc. Similarly if the bfqq is scheduled to merge with another bfqq but the merge didn't happen yet, cancel the merge as it need not be valid anymore. CC: stable(a)vger.kernel.org Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support") Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-cgroup.c | 36 +++++++++++++++++++++++++++++++++--- block/bfq-iosched.c | 2 +- block/bfq-iosched.h | 1 + 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 420eda2589c0..9352f3cc2377 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -743,9 +743,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + } else { + struct bfq_queue *bfqq; + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != + &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is + * not valid anymore. We cannot easily just + * cancel the merge (by clearing new_bfqq) as + * there may be other processes using this + * queue and holding refs to all queues below + * sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from + * bfqq now so that we cannot merge bio to a + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7d00b21ebe5d..89fe3f85eb3c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5315,7 +5315,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void bfq_put_cooperator(struct bfq_queue *bfqq) +void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 3b83e3d1c2e5..a56763045d19 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -979,6 +979,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); +void bfq_put_cooperator(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); -- 2.34.1

3 years, 5 months

1
0
0 0

[PATCH 2/9] bfq: Avoid merging queues with different parents

by Jan Kara

It can happen that the parent of a bfqq changes between the moment we decide two queues are worth to merge (and set bic->stable_merge_bfqq) and the moment bfq_setup_merge() is called. This can happen e.g. because the process submitted IO for a different cgroup and thus bfqq got reparented. It can even happen that the bfqq we are merging with has parent cgroup that is already offline and going to be destroyed in which case the merge can lead to use-after-free issues such as: BUG: KASAN: use-after-free in __bfq_deactivate_entity+0x9cb/0xa50 Read of size 8 at addr ffff88800693c0c0 by task runc:[2:INIT]/10544 CPU: 0 PID: 10544 Comm: runc:[2:INIT] Tainted: G E 5.15.2-0.g5fb85fd-default #1 openSUSE Tumbleweed (unreleased) f1f3b891c72369aebecd2e43e4641a6358867c70 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 Call Trace: <IRQ> dump_stack_lvl+0x46/0x5a print_address_description.constprop.0+0x1f/0x140 ? __bfq_deactivate_entity+0x9cb/0xa50 kasan_report.cold+0x7f/0x11b ? __bfq_deactivate_entity+0x9cb/0xa50 __bfq_deactivate_entity+0x9cb/0xa50 ? update_curr+0x32f/0x5d0 bfq_deactivate_entity+0xa0/0x1d0 bfq_del_bfqq_busy+0x28a/0x420 ? resched_curr+0x116/0x1d0 ? bfq_requeue_bfqq+0x70/0x70 ? check_preempt_wakeup+0x52b/0xbc0 __bfq_bfqq_expire+0x1a2/0x270 bfq_bfqq_expire+0xd16/0x2160 ? try_to_wake_up+0x4ee/0x1260 ? bfq_end_wr_async_queues+0xe0/0xe0 ? _raw_write_unlock_bh+0x60/0x60 ? _raw_spin_lock_irq+0x81/0xe0 bfq_idle_slice_timer+0x109/0x280 ? bfq_dispatch_request+0x4870/0x4870 __hrtimer_run_queues+0x37d/0x700 ? enqueue_hrtimer+0x1b0/0x1b0 ? kvm_clock_get_cycles+0xd/0x10 ? ktime_get_update_offsets_now+0x6f/0x280 hrtimer_interrupt+0x2c8/0x740 Fix the problem by checking that the parent of the two bfqqs we are merging in bfq_setup_merge() is the same. Link: https://lore.kernel.org/linux-block/20211125172809.GC19572@quack2.suse.cz/ CC: stable(a)vger.kernel.org Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues") Tested-by: "yukuai (C)" <yukuai3(a)huawei.com> Signed-off-by: Jan Kara <jack(a)suse.cz> --- block/bfq-iosched.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6d122c28086e..7d00b21ebe5d 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2758,6 +2758,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; + /* + * Make sure merged queues belong to the same parent. Parents could + * have changed since the time we decided the two queues are suitable + * for merging. + */ + if (new_bfqq->entity.parent != bfqq->entity.parent) + return NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); -- 2.34.1

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] firmware: stratix10-svc: add missing callback parameter on" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b850b7a8b369322adf699ef48ceff4d902525c8c Mon Sep 17 00:00:00 2001 From: Ang Tien Sung <tien.sung.ang(a)intel.com> Date: Wed, 23 Feb 2022 08:41:46 -0600 Subject: [PATCH] firmware: stratix10-svc: add missing callback parameter on RSU Fix a bug whereby, the return response of parameter a1 from an SMC call is not properly set to the callback data during an INTEL_SIP_SMC_RSU_ERROR command. Link: https://lore.kernel.org/lkml/20220216081513.28319-1-tien.sung.ang@intel.com Fixes: 6b50d882d38d ("firmware: add remote status update client support") Cc: stable(a)vger.kernel.org Signed-off-by: Ang Tien Sung <tien.sung.ang(a)intel.com> Signed-off-by: Dinh Nguyen <dinguyen(a)kernel.org> Link: https://lore.kernel.org/r/20220223144146.399263-1-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 4bd57a908efe..8177a0fae11d 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -483,7 +483,7 @@ static int svc_normal_to_secure_thread(void *data) case INTEL_SIP_SMC_RSU_ERROR: pr_err("%s: STATUS_ERROR\n", __func__); cbdata->status = BIT(SVC_STATUS_ERROR); - cbdata->kaddr1 = NULL; + cbdata->kaddr1 = &res.a1; cbdata->kaddr2 = NULL; cbdata->kaddr3 = NULL; pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 571426631acf46e2999c7ecd1e9d048172969a43 Mon Sep 17 00:00:00 2001 From: Billy Tsai <billy_tsai(a)aspeedtech.com> Date: Mon, 21 Feb 2022 09:27:05 +0800 Subject: [PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage reading. The formula for the ADC sampling period in ast2400/ast2500 is: ADC clock period = PCLK * 2 * (ADC0C[31:17] + 1) * (ADC0C[9:0]) When ADC0C[9:0] is set to 0 the sampling voltage will be lower than expected, because the hardware may not have enough time to charge/discharge to a stable voltage. This patch use the flag CLK_DIVIDER_ONE_BASED which will use the raw value read from the register, with the value of zero considered invalid to conform to the corrected formula. Fixes: 573803234e72 ("iio: Aspeed ADC") Reported-by: Konstantin Klubnichkin <kitsok(a)yandex-team.ru> Signed-off-by: Billy Tsai <billy_tsai(a)aspeedtech.com> Reviewed-by: Joel Stanley <joel(a)jms.id.au> Link: https://lore.kernel.org/r/20220221012705.22008-1-billy_tsai@aspeedtech.com Cc: <Stable(a)vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com> diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb56..0793d2474cdc 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 571426631acf46e2999c7ecd1e9d048172969a43 Mon Sep 17 00:00:00 2001 From: Billy Tsai <billy_tsai(a)aspeedtech.com> Date: Mon, 21 Feb 2022 09:27:05 +0800 Subject: [PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage reading. The formula for the ADC sampling period in ast2400/ast2500 is: ADC clock period = PCLK * 2 * (ADC0C[31:17] + 1) * (ADC0C[9:0]) When ADC0C[9:0] is set to 0 the sampling voltage will be lower than expected, because the hardware may not have enough time to charge/discharge to a stable voltage. This patch use the flag CLK_DIVIDER_ONE_BASED which will use the raw value read from the register, with the value of zero considered invalid to conform to the corrected formula. Fixes: 573803234e72 ("iio: Aspeed ADC") Reported-by: Konstantin Klubnichkin <kitsok(a)yandex-team.ru> Signed-off-by: Billy Tsai <billy_tsai(a)aspeedtech.com> Reviewed-by: Joel Stanley <joel(a)jms.id.au> Link: https://lore.kernel.org/r/20220221012705.22008-1-billy_tsai@aspeedtech.com Cc: <Stable(a)vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com> diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb56..0793d2474cdc 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 571426631acf46e2999c7ecd1e9d048172969a43 Mon Sep 17 00:00:00 2001 From: Billy Tsai <billy_tsai(a)aspeedtech.com> Date: Mon, 21 Feb 2022 09:27:05 +0800 Subject: [PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage reading. The formula for the ADC sampling period in ast2400/ast2500 is: ADC clock period = PCLK * 2 * (ADC0C[31:17] + 1) * (ADC0C[9:0]) When ADC0C[9:0] is set to 0 the sampling voltage will be lower than expected, because the hardware may not have enough time to charge/discharge to a stable voltage. This patch use the flag CLK_DIVIDER_ONE_BASED which will use the raw value read from the register, with the value of zero considered invalid to conform to the corrected formula. Fixes: 573803234e72 ("iio: Aspeed ADC") Reported-by: Konstantin Klubnichkin <kitsok(a)yandex-team.ru> Signed-off-by: Billy Tsai <billy_tsai(a)aspeedtech.com> Reviewed-by: Joel Stanley <joel(a)jms.id.au> Link: https://lore.kernel.org/r/20220221012705.22008-1-billy_tsai@aspeedtech.com Cc: <Stable(a)vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com> diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb56..0793d2474cdc 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 571426631acf46e2999c7ecd1e9d048172969a43 Mon Sep 17 00:00:00 2001 From: Billy Tsai <billy_tsai(a)aspeedtech.com> Date: Mon, 21 Feb 2022 09:27:05 +0800 Subject: [PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage reading. The formula for the ADC sampling period in ast2400/ast2500 is: ADC clock period = PCLK * 2 * (ADC0C[31:17] + 1) * (ADC0C[9:0]) When ADC0C[9:0] is set to 0 the sampling voltage will be lower than expected, because the hardware may not have enough time to charge/discharge to a stable voltage. This patch use the flag CLK_DIVIDER_ONE_BASED which will use the raw value read from the register, with the value of zero considered invalid to conform to the corrected formula. Fixes: 573803234e72 ("iio: Aspeed ADC") Reported-by: Konstantin Klubnichkin <kitsok(a)yandex-team.ru> Signed-off-by: Billy Tsai <billy_tsai(a)aspeedtech.com> Reviewed-by: Joel Stanley <joel(a)jms.id.au> Link: https://lore.kernel.org/r/20220221012705.22008-1-billy_tsai@aspeedtech.com Cc: <Stable(a)vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com> diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb56..0793d2474cdc 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler);

3 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 571426631acf46e2999c7ecd1e9d048172969a43 Mon Sep 17 00:00:00 2001 From: Billy Tsai <billy_tsai(a)aspeedtech.com> Date: Mon, 21 Feb 2022 09:27:05 +0800 Subject: [PATCH] iio: adc: aspeed: Add divider flag to fix incorrect voltage reading. The formula for the ADC sampling period in ast2400/ast2500 is: ADC clock period = PCLK * 2 * (ADC0C[31:17] + 1) * (ADC0C[9:0]) When ADC0C[9:0] is set to 0 the sampling voltage will be lower than expected, because the hardware may not have enough time to charge/discharge to a stable voltage. This patch use the flag CLK_DIVIDER_ONE_BASED which will use the raw value read from the register, with the value of zero considered invalid to conform to the corrected formula. Fixes: 573803234e72 ("iio: Aspeed ADC") Reported-by: Konstantin Klubnichkin <kitsok(a)yandex-team.ru> Signed-off-by: Billy Tsai <billy_tsai(a)aspeedtech.com> Reviewed-by: Joel Stanley <joel(a)jms.id.au> Link: https://lore.kernel.org/r/20220221012705.22008-1-billy_tsai@aspeedtech.com Cc: <Stable(a)vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com> diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb56..0793d2474cdc 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler);

3 years, 5 months

1
0
0 0

[PATCH] MIPS: crypto: Fix CRC32 code

by Paul Cercueil

Commit 67512a8cf5a7 ("MIPS: Avoid macro redefinitions") changed how the MIPS register macros were defined, in order to allow the code to compile under LLVM/Clang. The MIPS CRC32 code however wasn't updated accordingly, causing a build bug when using a MIPS32r6 toolchain without CRC support. Update the CRC32 code to use the macros correctly, to fix the build failures. Fixes: 67512a8cf5a7 ("MIPS: Avoid macro redefinitions") Cc: <stable(a)vger.kernel.org> Signed-off-by: Paul Cercueil <paul(a)crapouillou.net> Reported-by: kernel test robot <lkp(a)intel.com> --- arch/mips/crypto/crc32-mips.c | 46 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 0a03529cf317..3e4f5ba104f8 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -28,7 +28,7 @@ enum crc_type { }; #ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \ +#define _ASM_SET_CRC(OP, SZ, TYPE) \ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ".ifnc \\rt, \\rt2\n\t" \ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ @@ -37,30 +37,36 @@ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ((SZ) << 6) | ((TYPE) << 8)) \ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ ((SZ) << 14) | ((TYPE) << 3))) -_ASM_MACRO_CRC32(crc32b, 0, 0); -_ASM_MACRO_CRC32(crc32h, 1, 0); -_ASM_MACRO_CRC32(crc32w, 2, 0); -_ASM_MACRO_CRC32(crc32d, 3, 0); -_ASM_MACRO_CRC32(crc32cb, 0, 1); -_ASM_MACRO_CRC32(crc32ch, 1, 1); -_ASM_MACRO_CRC32(crc32cw, 2, 1); -_ASM_MACRO_CRC32(crc32cd, 3, 1); -#define _ASM_SET_CRC "" +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" #else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC ".set\tcrc\n\t" +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) #endif -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC \ - #type #size " %0, %1, %0\n\t" \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ } while (0) +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + #define CRC32(crc, value, size) \ _CRC32(crc, value, size, crc32) -- 2.35.1

3 years, 5 months

2
1
0 0

[PATCH v2] sound/hda: Add NULL check to component match callback function

by Won Chung

Component match callback function needs to check if expected data is passed to it. Without this check, it can cause a NULL pointer dereference when another driver registers a component before i915 drivers have their component master fully bind. Fixes: 7b882fe3e3e8b ("ALSA: hda - handle multiple i915 device instances") Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com> Signed-off-by: Mika Westerberg <mika.westerberg(a)linux.intel.com> Signed-off-by: Won Chung <wonchung(a)google.com> --- - Add "Fixes" tag - Send to stable(a)vger.kernel.org sound/hda/hdac_i915.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index efe810af28c5..958b0975fa40 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -102,13 +102,13 @@ static int i915_component_master_match(struct device *dev, int subcomponent, struct pci_dev *hdac_pci, *i915_pci; struct hdac_bus *bus = data; - if (!dev_is_pci(dev)) + if (!dev_is_pci(dev) || !bus) return 0; hdac_pci = to_pci_dev(bus->dev); i915_pci = to_pci_dev(dev); - if (!strcmp(dev->driver->name, "i915") && + if (dev->driver && !strcmp(dev->driver->name, "i915") && subcomponent == I915_COMPONENT_AUDIO && connectivity_check(i915_pci, hdac_pci)) return 1; -- 2.35.1.1021.g381101b075-goog

3 years, 5 months

6
22
0 0

Why rolling trees aren't updated last week?

by Vitaly Chikunov

Hello, Why linux-rolling-lts and linux-rolling-stable trees aren't updated last week and stay on the previous lts and stable releases? Thanks,

3 years, 5 months

2
1
0 0

+ mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch added to -mm tree

by Andrew Morton

The patch titled Subject: mm/mempolicy: fix mpol_new leak in shared_policy_replace has been added to the -mm tree. Its filename is mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/mm-mempolicy-fix-mpol_new-leak-in… and later at https://ozlabs.org/~akpm/mmotm/broken-out/mm-mempolicy-fix-mpol_new-leak-in… Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Miaohe Lin <linmiaohe(a)huawei.com> Subject: mm/mempolicy: fix mpol_new leak in shared_policy_replace If mpol_new is allocated but not used in restart loop, mpol_new will be freed via mpol_put before returning to the caller. But refcnt is not initialized yet, so mpol_put could not do the right things and might leak the unused mpol_new. This would happen if mempolicy was updated on the shared shmem file while the sp->lock has been dropped during the memory allocation. This issue could be triggered easily with the below code snippet if there are many processes doing the below work at the same time: shmid = shmget((key_t)5566, 1024 * PAGE_SIZE, 0666|IPC_CREAT); shm = shmat(shmid, 0, 0); loop many times { mbind(shm, 1024 * PAGE_SIZE, MPOL_LOCAL, mask, maxnode, 0); mbind(shm + 128 * PAGE_SIZE, 128 * PAGE_SIZE, MPOL_DEFAULT, mask, maxnode, 0); } Link: https://lkml.kernel.org/r/20220329111416.27954-1-linmiaohe@huawei.com Fixes: 42288fe366c4 ("mm: mempolicy: Convert shared_policy mutex to spinlock") Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: KOSAKI Motohiro <kosaki.motohiro(a)jp.fujitsu.com> Cc: Mel Gorman <mgorman(a)suse.de> Cc: <stable(a)vger.kernel.org> [3.8 Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mempolicy.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/mempolicy.c~mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace +++ a/mm/mempolicy.c @@ -2733,6 +2733,7 @@ alloc_new: mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } _ Patches currently in -mm which might be from linmiaohe(a)huawei.com are mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch mm-shmem-make-shmem_init-return-void.patch mm-memcg-remove-unneeded-nr_scanned.patch mm-mremap-use-helper-mlock_future_check.patch mm-z3fold-declare-z3fold_mount-with-__init.patch mm-z3fold-remove-obsolete-comment-in-z3fold_alloc.patch mm-z3fold-minor-clean-up-for-z3fold_free.patch mm-z3fold-remove-unneeded-page_mapcount_reset-and-clearpageprivate.patch mm-z3fold-remove-confusing-local-variable-l-reassignment.patch mm-z3fold-move-decrement-of-pool-pages_nr-into-__release_z3fold_page.patch mm-z3fold-remove-redundant-list_del_init-of-zhdr-buddy-in-z3fold_free.patch mm-z3fold-remove-unneeded-page_headless-check-in-free_handle.patch mm-compaction-use-helper-isolation_suitable.patch mm-migration-remove-unneeded-local-variable-mapping_locked.patch mm-migration-remove-unneeded-out-label.patch mm-migration-remove-unneeded-local-variable-page_lru.patch mm-migration-fix-the-confusing-pagetranshuge-check.patch mm-migration-use-helper-function-vma_lookup-in-add_page_for_migration.patch mm-migration-use-helper-macro-min-in-do_pages_stat.patch mm-migration-avoid-unneeded-nodemask_t-initialization.patch mm-migration-remove-some-duplicated-codes-in-migrate_pages.patch mm-migration-fix-potential-page-refcounts-leak-in-migrate_pages.patch mm-migration-fix-potential-invalid-node-access-for-reclaim-based-migration.patch mm-migration-fix-possible-do_pages_stat_array-racing-with-memory-offline.patch

3 years, 5 months

1
0
0 0

[PATCH v3] iommu/vt-d: calculate mask for non-aligned flushes

by David Stevens

From: David Stevens <stevensd(a)chromium.org> Calculate the appropriate mask for non-size-aligned page selective invalidation. Since psi uses the mask value to mask out the lower order bits of the target address, properly flushing the iotlb requires using a mask value such that [pfn, pfn+pages) all lie within the flushed size-aligned region. This is not normally an issue because iova.c always allocates iovas that are aligned to their size. However, iovas which come from other sources (e.g. userspace via VFIO) may not be aligned. To properly flush the IOTLB, both the start and end pfns need to be equal after applying the mask. That means that the most efficient mask to use is the index of the lowest bit that is equal where all higher bits are also equal. For example, if pfn=0x17f and pages=3, then end_pfn=0x181, so the smallest mask we can use is 8. Any differences above the highest bit of pages are due to carrying, so by xnor'ing pfn and end_pfn and then masking out the lower order bits based on pages, we get 0xffffff00, where the first set bit is the mask we want to use. Fixes: 6fe1010d6d9c ("vfio/type1: DMA unmap chunking") Cc: stable(a)vger.kernel.org Signed-off-by: David Stevens <stevensd(a)chromium.org> Reviewed-by: Kevin Tian <kevin.tian(a)intel.com> --- The seeds of the bug were introduced by f76aec76ec7f6, which simultaniously added the alignement requirement to the iommu driver and made the iova allocator return aligned iovas. However, I don't think there was any way to trigger the bug at that time. The tagged VFIO change is one that actually introduced a code path that could trigger the bug. There may also be other ways to trigger the bug that I am not aware of. v1 -> v2: - Calculate an appropriate mask for non-size-aligned iovas instead of falling back to domain selective flush. v2 -> v3: - Add more detail to commit message. drivers/iommu/intel/iommu.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5b196cfe9ed2..ab2273300346 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1717,7 +1717,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned long pfn, unsigned int pages, int ih, int map) { - unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned int mask = ilog2(aligned_pages); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain->iommu_did[iommu->seq_id]; @@ -1729,10 +1730,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (domain_use_first_level(domain)) { domain_flush_piotlb(iommu, domain, addr, pages, ih); } else { + unsigned long bitmask = aligned_pages - 1; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + /* * Fallback to domain selective flush if no PSI support or - * the size is too big. PSI requires page size to be 2 ^ x, - * and the base address is naturally aligned to the size. + * the size is too big. */ if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) -- 2.35.1.1094.g7c7d902a7c-goog

3 years, 5 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror