NULL pointer exception happens occasionally on serial output initiated by login timeout. This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
col-vf50 login: root Password: Login timed out after 60 seconds. Unable to handle kernel NULL pointer dereference at virtual address 00000044 Internal error: Oops: 5 [#1] ARM CPU: 0 PID: 157 Comm: login Not tainted 5.7.0-next-20200610-dirty #4 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (fsl_edma_tx_handler) from [<8016eb10>] (__handle_irq_event_percpu+0x64/0x304) (__handle_irq_event_percpu) from [<8016eddc>] (handle_irq_event_percpu+0x2c/0x7c) (handle_irq_event_percpu) from [<8016ee64>] (handle_irq_event+0x38/0x5c) (handle_irq_event) from [<801729e4>] (handle_fasteoi_irq+0xa4/0x160) (handle_fasteoi_irq) from [<8016ddcc>] (generic_handle_irq+0x34/0x44) (generic_handle_irq) from [<8016e40c>] (__handle_domain_irq+0x54/0xa8) (__handle_domain_irq) from [<80508bc8>] (gic_handle_irq+0x4c/0x80) (gic_handle_irq) from [<80100af0>] (__irq_svc+0x70/0x98) Exception stack(0x8459fe80 to 0x8459fec8) fe80: 72286b00 e3359f64 00000001 0000412d a0070013 85c98840 85c98840 a0070013 fea0: 8054e0d4 00000000 00000002 00000000 00000002 8459fed0 8081fbe8 8081fbec fec0: 60070013 ffffffff (__irq_svc) from [<8081fbec>] (_raw_spin_unlock_irqrestore+0x30/0x58) (_raw_spin_unlock_irqrestore) from [<8056cb48>] (uart_flush_buffer+0x88/0xf8) (uart_flush_buffer) from [<80554e60>] (tty_ldisc_hangup+0x38/0x1ac) (tty_ldisc_hangup) from [<8054c7f4>] (__tty_hangup+0x158/0x2bc) (__tty_hangup) from [<80557b90>] (disassociate_ctty.part.1+0x30/0x23c) (disassociate_ctty.part.1) from [<8011fc18>] (do_exit+0x580/0xba0) (do_exit) from [<801214f8>] (do_group_exit+0x3c/0xb4) (do_group_exit) from [<80121580>] (__wake_up_parent+0x0/0x14)
Issue looks like race condition between interrupt handler fsl_edma_tx_handler() (called as result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
Fixes: d6be34fbd39b ("dma: Add Freescale eDMA engine driver support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org --- drivers/dma/fsl-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index eff7ebd8cf35..90bb72af306c 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -45,6 +45,13 @@ static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id) fsl_chan = &fsl_edma->chans[ch];
spin_lock(&fsl_chan->vchan.lock); + + if (!fsl_chan->edesc) { + /* terminate_all called before */ + spin_unlock(&fsl_chan->vchan.lock); + continue; + } + if (!fsl_chan->edesc->iscyclic) { list_del(&fsl_chan->edesc->vdesc.node); vchan_cookie_complete(&fsl_chan->edesc->vdesc);
On 2020/06/11 20:18 Krzysztof Kozlowski krzk@kernel.org wrote:
NULL pointer exception happens occasionally on serial output initiated by login timeout. This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
col-vf50 login: root Password: Login timed out after 60 seconds. Unable to handle kernel NULL pointer dereference at virtual address
00000044 Internal error: Oops: 5 [#1] ARM CPU: 0 PID: 157 Comm: login Not tainted 5.7.0-next-20200610-dirty #4 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (fsl_edma_tx_handler) from [<8016eb10>] (__handle_irq_event_percpu+0x64/0x304) (__handle_irq_event_percpu) from [<8016eddc>] (handle_irq_event_percpu+0x2c/0x7c) (handle_irq_event_percpu) from [<8016ee64>] (handle_irq_event+0x38/0x5c) (handle_irq_event) from [<801729e4>] (handle_fasteoi_irq+0xa4/0x160) (handle_fasteoi_irq) from [<8016ddcc>] (generic_handle_irq+0x34/0x44) (generic_handle_irq) from [<8016e40c>] (__handle_domain_irq+0x54/0xa8) (__handle_domain_irq) from [<80508bc8>] (gic_handle_irq+0x4c/0x80) (gic_handle_irq) from [<80100af0>] (__irq_svc+0x70/0x98) Exception stack(0x8459fe80 to 0x8459fec8) fe80: 72286b00 e3359f64 00000001 0000412d a0070013 85c98840 85c98840 a0070013 fea0: 8054e0d4 00000000 00000002 00000000 00000002 8459fed0 8081fbe8 8081fbec fec0: 60070013 ffffffff (__irq_svc) from [<8081fbec>] (_raw_spin_unlock_irqrestore+0x30/0x58) (_raw_spin_unlock_irqrestore) from [<8056cb48>] (uart_flush_buffer+0x88/0xf8) (uart_flush_buffer) from [<80554e60>] (tty_ldisc_hangup+0x38/0x1ac) (tty_ldisc_hangup) from [<8054c7f4>] (__tty_hangup+0x158/0x2bc) (__tty_hangup) from [<80557b90>] (disassociate_ctty.part.1+0x30/0x23c) (disassociate_ctty.part.1) from [<8011fc18>] (do_exit+0x580/0xba0) (do_exit) from [<801214f8>] (do_group_exit+0x3c/0xb4) (do_group_exit) from [<80121580>] (__wake_up_parent+0x0/0x14)
Issue looks like race condition between interrupt handler fsl_edma_tx_handler() (called as result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
Fixes: d6be34fbd39b ("dma: Add Freescale eDMA engine driver support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org
drivers/dma/fsl-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index eff7ebd8cf35..90bb72af306c 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -45,6 +45,13 @@ static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id) fsl_chan = &fsl_edma->chans[ch];
spin_lock(&fsl_chan->vchan.lock);
if (!fsl_chan->edesc) {
Would you like fix the same potential issue in mcf_edma_tx_handler() of mcf-edma.c?
/* terminate_all called before */
spin_unlock(&fsl_chan->vchan.lock);
continue;
}
if (!fsl_chan->edesc->iscyclic) { list_del(&fsl_chan->edesc->vdesc.node); vchan_cookie_complete(&fsl_chan->edesc->vdesc);
-- 2.7.4
On Thu, Jun 11, 2020 at 01:04:43PM +0000, Robin Gong wrote:
On 2020/06/11 20:18 Krzysztof Kozlowski krzk@kernel.org wrote:
NULL pointer exception happens occasionally on serial output initiated by login timeout. This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
col-vf50 login: root Password: Login timed out after 60 seconds. Unable to handle kernel NULL pointer dereference at virtual address
00000044 Internal error: Oops: 5 [#1] ARM CPU: 0 PID: 157 Comm: login Not tainted 5.7.0-next-20200610-dirty #4 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (fsl_edma_tx_handler) from [<8016eb10>] (__handle_irq_event_percpu+0x64/0x304) (__handle_irq_event_percpu) from [<8016eddc>] (handle_irq_event_percpu+0x2c/0x7c) (handle_irq_event_percpu) from [<8016ee64>] (handle_irq_event+0x38/0x5c) (handle_irq_event) from [<801729e4>] (handle_fasteoi_irq+0xa4/0x160) (handle_fasteoi_irq) from [<8016ddcc>] (generic_handle_irq+0x34/0x44) (generic_handle_irq) from [<8016e40c>] (__handle_domain_irq+0x54/0xa8) (__handle_domain_irq) from [<80508bc8>] (gic_handle_irq+0x4c/0x80) (gic_handle_irq) from [<80100af0>] (__irq_svc+0x70/0x98) Exception stack(0x8459fe80 to 0x8459fec8) fe80: 72286b00 e3359f64 00000001 0000412d a0070013 85c98840 85c98840 a0070013 fea0: 8054e0d4 00000000 00000002 00000000 00000002 8459fed0 8081fbe8 8081fbec fec0: 60070013 ffffffff (__irq_svc) from [<8081fbec>] (_raw_spin_unlock_irqrestore+0x30/0x58) (_raw_spin_unlock_irqrestore) from [<8056cb48>] (uart_flush_buffer+0x88/0xf8) (uart_flush_buffer) from [<80554e60>] (tty_ldisc_hangup+0x38/0x1ac) (tty_ldisc_hangup) from [<8054c7f4>] (__tty_hangup+0x158/0x2bc) (__tty_hangup) from [<80557b90>] (disassociate_ctty.part.1+0x30/0x23c) (disassociate_ctty.part.1) from [<8011fc18>] (do_exit+0x580/0xba0) (do_exit) from [<801214f8>] (do_group_exit+0x3c/0xb4) (do_group_exit) from [<80121580>] (__wake_up_parent+0x0/0x14)
Issue looks like race condition between interrupt handler fsl_edma_tx_handler() (called as result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
Fixes: d6be34fbd39b ("dma: Add Freescale eDMA engine driver support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org
drivers/dma/fsl-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index eff7ebd8cf35..90bb72af306c 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -45,6 +45,13 @@ static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id) fsl_chan = &fsl_edma->chans[ch];
spin_lock(&fsl_chan->vchan.lock);
if (!fsl_chan->edesc) {
Would you like fix the same potential issue in mcf_edma_tx_handler() of mcf-edma.c?
Sure. I'll make another commit as it should be backported to different kernel.
Best regards, Krzysztof
On Toradex Colibri VF50 (Vybrid VF5xx) with fsl-edma driver NULL pointer exception happens occasionally on serial output initiated by login timeout.
This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
Issue looks like a race condition between interrupt handler fsl_edma_tx_handler() (called as a result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
The mcf-edma driver shares design and lot of code with fsl-edma. It looks like being affected by same problem. Fix this pattern the same way as fix for fsl-edma driver.
Fixes: e7a3ff92eaf1 ("dmaengine: fsl-edma: add ColdFire mcf5441x edma support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org
---
Not tested on HW. --- drivers/dma/mcf-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c index e15bd15a9ef6..e12b754e6398 100644 --- a/drivers/dma/mcf-edma.c +++ b/drivers/dma/mcf-edma.c @@ -35,6 +35,13 @@ static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id) mcf_chan = &mcf_edma->chans[ch];
spin_lock(&mcf_chan->vchan.lock); + + if (!mcf_chan->edesc) { + /* terminate_all called before */ + spin_unlock(&mcf_chan->vchan.lock); + continue; + } + if (!mcf_chan->edesc->iscyclic) { list_del(&mcf_chan->edesc->vdesc.node); vchan_cookie_complete(&mcf_chan->edesc->vdesc);
On 2020/06/11 Krzysztof Kozlowski krzk@kernel.org wrote:
On Toradex Colibri VF50 (Vybrid VF5xx) with fsl-edma driver NULL pointer exception happens occasionally on serial output initiated by login timeout.
This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
Issue looks like a race condition between interrupt handler fsl_edma_tx_handler() (called as a result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
The mcf-edma driver shares design and lot of code with fsl-edma. It looks like being affected by same problem. Fix this pattern the same way as fix for fsl-edma driver.
Fixes: e7a3ff92eaf1 ("dmaengine: fsl-edma: add ColdFire mcf5441x edma support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org
Reviewed-by: Robin Gong yibin.gong@nxp.com
Not tested on HW.
drivers/dma/mcf-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c index e15bd15a9ef6..e12b754e6398 100644 --- a/drivers/dma/mcf-edma.c +++ b/drivers/dma/mcf-edma.c @@ -35,6 +35,13 @@ static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id) mcf_chan = &mcf_edma->chans[ch];
spin_lock(&mcf_chan->vchan.lock);
if (!mcf_chan->edesc) {
/* terminate_all called before */
spin_unlock(&mcf_chan->vchan.lock);
continue;
}
if (!mcf_chan->edesc->iscyclic) { list_del(&mcf_chan->edesc->vdesc.node); vchan_cookie_complete(&mcf_chan->edesc->vdesc);
-- 2.7.4
On 2020/06/11 20:18 Krzysztof Kozlowski krzk@kernel.org wrote:
NULL pointer exception happens occasionally on serial output initiated by login timeout. This was reproduced only if kernel was built with significant debugging options and EDMA driver is used with serial console.
col-vf50 login: root Password: Login timed out after 60 seconds. Unable to handle kernel NULL pointer dereference at virtual address
00000044 Internal error: Oops: 5 [#1] ARM CPU: 0 PID: 157 Comm: login Not tainted 5.7.0-next-20200610-dirty #4 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (fsl_edma_tx_handler) from [<8016eb10>] (__handle_irq_event_percpu+0x64/0x304) (__handle_irq_event_percpu) from [<8016eddc>] (handle_irq_event_percpu+0x2c/0x7c) (handle_irq_event_percpu) from [<8016ee64>] (handle_irq_event+0x38/0x5c) (handle_irq_event) from [<801729e4>] (handle_fasteoi_irq+0xa4/0x160) (handle_fasteoi_irq) from [<8016ddcc>] (generic_handle_irq+0x34/0x44) (generic_handle_irq) from [<8016e40c>] (__handle_domain_irq+0x54/0xa8) (__handle_domain_irq) from [<80508bc8>] (gic_handle_irq+0x4c/0x80) (gic_handle_irq) from [<80100af0>] (__irq_svc+0x70/0x98) Exception stack(0x8459fe80 to 0x8459fec8) fe80: 72286b00 e3359f64 00000001 0000412d a0070013 85c98840 85c98840 a0070013 fea0: 8054e0d4 00000000 00000002 00000000 00000002 8459fed0 8081fbe8 8081fbec fec0: 60070013 ffffffff (__irq_svc) from [<8081fbec>] (_raw_spin_unlock_irqrestore+0x30/0x58) (_raw_spin_unlock_irqrestore) from [<8056cb48>] (uart_flush_buffer+0x88/0xf8) (uart_flush_buffer) from [<80554e60>] (tty_ldisc_hangup+0x38/0x1ac) (tty_ldisc_hangup) from [<8054c7f4>] (__tty_hangup+0x158/0x2bc) (__tty_hangup) from [<80557b90>] (disassociate_ctty.part.1+0x30/0x23c) (disassociate_ctty.part.1) from [<8011fc18>] (do_exit+0x580/0xba0) (do_exit) from [<801214f8>] (do_group_exit+0x3c/0xb4) (do_group_exit) from [<80121580>] (__wake_up_parent+0x0/0x14)
Issue looks like race condition between interrupt handler fsl_edma_tx_handler() (called as result of fsl_edma_xfer_desc()) and terminating the transfer with fsl_edma_terminate_all().
The fsl_edma_tx_handler() handles interrupt for a transfer with already freed edesc and idle==true.
Fixes: d6be34fbd39b ("dma: Add Freescale eDMA engine driver support") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski krzk@kernel.org
drivers/dma/fsl-edma.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index eff7ebd8cf35..90bb72af306c 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -45,6 +45,13 @@ static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id) fsl_chan = &fsl_edma->chans[ch];
spin_lock(&fsl_chan->vchan.lock);
if (!fsl_chan->edesc) {
/* terminate_all called before */
spin_unlock(&fsl_chan->vchan.lock);
continue;
}
Reviewed-by: Robin Gong yibin.gong@nxp.com
if (!fsl_chan->edesc->iscyclic) { list_del(&fsl_chan->edesc->vdesc.node); vchan_cookie_complete(&fsl_chan->edesc->vdesc);
-- 2.7.4
linux-stable-mirror@lists.linaro.org