Current info->severity have not assigned a value before calling aer_get_device_error_info() and aer_get_device_error_info(), Fix the bug to get the severity by reading the port's AER status, mask and severity registers. At the same time, add code to clear the port's fatal errors.
Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling") Signed-off-by: Dongdong Liu liudongdong3@huawei.com Cc: stable@vger.kernel.org Cc: Keith Busch keith.busch@intel.com Cc: Bjorn Helgaas bhelgaas@google.com --- drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e435d12..7b77754 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); }
+static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, + struct aer_err_info *info) +{ + int pos = dev->aer_cap; + u32 status, mask, sev; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + status &= ~mask; + if (!status) + return 0; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) + info->severity = AER_FATAL; + else + info->severity = AER_NONFATAL; + + return 1; +} + static irqreturn_t dpc_handler(int irq, void *context) { struct aer_err_info info; @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context) /* show RP PIO error detail information */ if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc); - else if (reason == 0 && aer_get_device_error_info(pdev, &info)) { + else if (reason == 0 && + dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(pdev, &info)) { aer_print_error(pdev, &info); pci_cleanup_aer_uncorrect_error_status(pdev); + pci_aer_clear_fatal_status(pdev); }
/* We configure DPC so it only triggers on ERR_FATAL */
On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
+static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
struct aer_err_info *info)
+{
- int pos = dev->aer_cap;
- u32 status, mask, sev;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
- status &= ~mask;
- if (!status)
return 0;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
- status &= sev;
- if (status)
info->severity = AER_FATAL;
- else
info->severity = AER_NONFATAL;
- return 1;
+}
You can set info->severity to AER_FATAL since that's the only type we enable DPC triggering.
static irqreturn_t dpc_handler(int irq, void *context) { struct aer_err_info info; @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context) /* show RP PIO error detail information */ if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc);
- else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
- else if (reason == 0 &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_print_error(pdev, &info); pci_cleanup_aer_uncorrect_error_status(pdev);aer_get_device_error_info(pdev, &info)) {
pci_aer_clear_fatal_status(pdev);
Good catch here, but let's clear the pending bits with a single call to pci_cleanup_aer_error_status_regs() rather than NONFATAL and FATAL separately.
Hi Keith
Many thanks for your review.
在 2019/2/11 23:46, Keith Busch 写道:
On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
+static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
struct aer_err_info *info)
+{
- int pos = dev->aer_cap;
- u32 status, mask, sev;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
- status &= ~mask;
- if (!status)
return 0;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
- status &= sev;
- if (status)
info->severity = AER_FATAL;
- else
info->severity = AER_NONFATAL;
- return 1;
+}
You can set info->severity to AER_FATAL since that's the only type we enable DPC triggering.
DPC Trigger Enable 01b-DPC is enabled and is triggered when the Downstream Port detects an unmasked uncorrectable error or when the Downstream Port receives an ERR_FATAL Message.
DPC Trigger Reason 00b-DPC was triggered due to an unmasked uncorrectable error reason == 0, due to detect an unmasked uncorrectable error, include non-fatal and fatal error, so need to get the severity.
static irqreturn_t dpc_handler(int irq, void *context) { struct aer_err_info info; @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context) /* show RP PIO error detail information */ if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc);
- else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
- else if (reason == 0 &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_print_error(pdev, &info); pci_cleanup_aer_uncorrect_error_status(pdev);aer_get_device_error_info(pdev, &info)) {
pci_aer_clear_fatal_status(pdev);
Good catch here, but let's clear the pending bits with a single call to pci_cleanup_aer_error_status_regs() rather than NONFATAL and FATAL separately.
pci_cleanup_aer_error_status_regs() also clear correctable error status. seems not good enough as reason == 0 means detect an unmasked uncorrectable error.
Thanks, Dongdong
.
On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
Current info->severity have not assigned a value before calling aer_get_device_error_info() and aer_get_device_error_info(), Fix the bug to get the severity by reading the port's AER status, mask and severity registers. At the same time, add code to clear the port's fatal errors.
Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling") Signed-off-by: Dongdong Liu liudongdong3@huawei.com Cc: stable@vger.kernel.org Cc: Keith Busch keith.busch@intel.com Cc: Bjorn Helgaas bhelgaas@google.com
Looks good.
Reviewed-by: Keith Busch keith.busch@intel.com
drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e435d12..7b77754 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); } +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
struct aer_err_info *info)
+{
- int pos = dev->aer_cap;
- u32 status, mask, sev;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
- status &= ~mask;
- if (!status)
return 0;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
- status &= sev;
- if (status)
info->severity = AER_FATAL;
- else
info->severity = AER_NONFATAL;
- return 1;
+}
static irqreturn_t dpc_handler(int irq, void *context) { struct aer_err_info info; @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context) /* show RP PIO error detail information */ if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc);
- else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
- else if (reason == 0 &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_print_error(pdev, &info); pci_cleanup_aer_uncorrect_error_status(pdev);aer_get_device_error_info(pdev, &info)) {
}pci_aer_clear_fatal_status(pdev);
/* We configure DPC so it only triggers on ERR_FATAL */ -- 1.9.1
On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
Current info->severity have not assigned a value before calling aer_get_device_error_info() and aer_get_device_error_info(), Fix the bug to get the severity by reading the port's AER status, mask and severity registers. At the same time, add code to clear the port's fatal errors.
Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling") Signed-off-by: Dongdong Liu liudongdong3@huawei.com Cc: stable@vger.kernel.org Cc: Keith Busch keith.busch@intel.com Cc: Bjorn Helgaas bhelgaas@google.com
Applied to pci/dpc for v5.1, thanks!
drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e435d12..7b77754 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); } +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
struct aer_err_info *info)
+{
- int pos = dev->aer_cap;
- u32 status, mask, sev;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
- status &= ~mask;
- if (!status)
return 0;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
- status &= sev;
- if (status)
info->severity = AER_FATAL;
- else
info->severity = AER_NONFATAL;
- return 1;
+}
static irqreturn_t dpc_handler(int irq, void *context) { struct aer_err_info info; @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context) /* show RP PIO error detail information */ if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc);
- else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
- else if (reason == 0 &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_print_error(pdev, &info); pci_cleanup_aer_uncorrect_error_status(pdev);aer_get_device_error_info(pdev, &info)) {
}pci_aer_clear_fatal_status(pdev);
/* We configure DPC so it only triggers on ERR_FATAL */ -- 1.9.1
linux-stable-mirror@lists.linaro.org