The cros_ec_uart_probe() function calls devm_serdev_device_open() before it calls serdev_device_set_client_ops(). This can trigger a NULL pointer dereference:
BUG: kernel NULL pointer dereference, address: 0000000000000000 ... CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024 Workqueue: events_unbound flush_to_ldisc RIP: 0010:ttyport_receive_buf+0x3f/0xf0 ... Call Trace: <TASK> ? __die+0x10f/0x120 ? page_fault_oops+0x171/0x4e0 ? srso_return_thunk+0x5/0x5f ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? ttyport_receive_buf+0x3f/0xf0 flush_to_ldisc+0x9b/0x1c0 process_one_work+0x17b/0x340 worker_thread+0x301/0x490 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 </TASK>
A simplified version of crashing code is as follows:
static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, const u8 *data, size_t count) { struct serdev_device *serdev = ctrl->serdev;
if (!serdev || !serdev->ops->receive_buf) // CRASH! return 0;
return serdev->ops->receive_buf(serdev, data, count); }
static size_t ttyport_receive_buf(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count) { struct serdev_controller *ctrl = port->client_data; [...]
if (!test_bit(SERPORT_ACTIVE, &serport->flags)) return 0;
ret = serdev_controller_receive_buf(ctrl, cp, count);
[...] return ret; }
It assumes that if SERPORT_ACTIVE is set and serdev exists, serdev->ops will also exist. This conflicts with the existing cros_ec_uart_probe() logic, as it first calls devm_serdev_device_open() (which sets SERPORT_ACTIVE), and only later sets serdev->ops via serdev_device_set_client_ops().
Commit 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition") attempted to fix a similar race condition, but while doing so, made the window of error for this race condition to happen much wider.
Attempt to fix the race condition again, making sure we fully setup before calling devm_serdev_device_open().
Fixes: 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition") Cc: stable@vger.kernel.org Signed-off-by: Noah Loomans noah@noahloomans.com ---
This is my first time contributing to Linux, I hope this is a good patch. Feedback on how to improve is welcome!
drivers/platform/chrome/cros_ec_uart.c | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c index 8ea867c2a01a..62bc24f6dcc7 100644 --- a/drivers/platform/chrome/cros_ec_uart.c +++ b/drivers/platform/chrome/cros_ec_uart.c @@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) if (!ec_dev) return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev); - if (ret) { - dev_err(dev, "Unable to open UART device"); - return ret; - } - serdev_device_set_drvdata(serdev, ec_dev); init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) return ret; }
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate); - if (ret < 0) { - dev_err(dev, "Failed to set up host baud rate (%d)", ret); - return ret; - } - - serdev_device_set_flow_control(serdev, ec_uart->flowcontrol); - /* Initialize ec_dev for cros_ec */ ec_dev->phys_name = dev_name(dev); ec_dev->dev = dev; @@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev); + if (ret) { + dev_err(dev, "Unable to open UART device"); + return ret; + } + + ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate); + if (ret < 0) { + dev_err(dev, "Failed to set up host baud rate (%d)", ret); + return ret; + } + + serdev_device_set_flow_control(serdev, ec_uart->flowcontrol); + return cros_ec_register(ec_dev); }
On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans noah@noahloomans.com wrote:
The cros_ec_uart_probe() function calls devm_serdev_device_open() before it calls serdev_device_set_client_ops(). This can trigger a NULL pointer dereference:
BUG: kernel NULL pointer dereference, address: 0000000000000000 ... CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024 Workqueue: events_unbound flush_to_ldisc RIP: 0010:ttyport_receive_buf+0x3f/0xf0 ... Call Trace: <TASK> ? __die+0x10f/0x120 ? page_fault_oops+0x171/0x4e0 ? srso_return_thunk+0x5/0x5f ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? ttyport_receive_buf+0x3f/0xf0 flush_to_ldisc+0x9b/0x1c0 process_one_work+0x17b/0x340 worker_thread+0x301/0x490 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 </TASK>
A simplified version of crashing code is as follows:
static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, const u8 *data, size_t count) { struct serdev_device *serdev = ctrl->serdev; if (!serdev || !serdev->ops->receive_buf) // CRASH! return 0; return serdev->ops->receive_buf(serdev, data, count); } static size_t ttyport_receive_buf(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count) { struct serdev_controller *ctrl = port->client_data; [...] if (!test_bit(SERPORT_ACTIVE, &serport->flags)) return 0; ret = serdev_controller_receive_buf(ctrl, cp, count); [...] return ret; }
It assumes that if SERPORT_ACTIVE is set and serdev exists, serdev->ops will also exist. This conflicts with the existing cros_ec_uart_probe() logic, as it first calls devm_serdev_device_open() (which sets SERPORT_ACTIVE), and only later sets serdev->ops via serdev_device_set_client_ops().
Commit 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition") attempted to fix a similar race condition, but while doing so, made the window of error for this race condition to happen much wider.
Attempt to fix the race condition again, making sure we fully setup before calling devm_serdev_device_open().
Fixes: 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition") Cc: stable@vger.kernel.org Signed-off-by: Noah Loomans noah@noahloomans.com
This is my first time contributing to Linux, I hope this is a good patch. Feedback on how to improve is welcome!
The commit message is a bit long, but the patch itself looks good to me.
Reviewed-by: Guenter Roeck groeck@chromium.org
Guenter
drivers/platform/chrome/cros_ec_uart.c | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c index 8ea867c2a01a..62bc24f6dcc7 100644 --- a/drivers/platform/chrome/cros_ec_uart.c +++ b/drivers/platform/chrome/cros_ec_uart.c @@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) if (!ec_dev) return -ENOMEM;
ret = devm_serdev_device_open(dev, serdev);
if (ret) {
dev_err(dev, "Unable to open UART device");
return ret;
}
serdev_device_set_drvdata(serdev, ec_dev); init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) return ret; }
ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
if (ret < 0) {
dev_err(dev, "Failed to set up host baud rate (%d)", ret);
return ret;
}
serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
/* Initialize ec_dev for cros_ec */ ec_dev->phys_name = dev_name(dev); ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
ret = devm_serdev_device_open(dev, serdev);
if (ret) {
dev_err(dev, "Unable to open UART device");
return ret;
}
ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
if (ret < 0) {
dev_err(dev, "Failed to set up host baud rate (%d)", ret);
return ret;
}
serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
return cros_ec_register(ec_dev);
}
-- 2.44.0
On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans noah@noahloomans.com wrote:
This is my first time contributing to Linux, I hope this is a good patch. Feedback on how to improve is welcome!
The commit message is a bit long, but the patch itself looks good to me.
Hmm yeah it's a bit on a long side. I'm not sure what could be removed though, it all seems relevant for understanding the bug and the fix.
Reviewed-by: Guenter Roeck groeck@chromium.org
Thanks!
-Noah
On Thu, Apr 11, 2024 at 12:06:33AM +0200, Noah Loomans wrote:
On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans noah@noahloomans.com wrote:
This is my first time contributing to Linux, I hope this is a good patch. Feedback on how to improve is welcome!
The commit message is a bit long, but the patch itself looks good to me.
Hmm yeah it's a bit on a long side. I'm not sure what could be removed though, it all seems relevant for understanding the bug and the fix.
Applied with shortening the message slightly.
On Wed, Apr 10, 2024 at 7:18 PM Tzung-Bi Shih tzungbi@kernel.org wrote:
On Thu, Apr 11, 2024 at 12:06:33AM +0200, Noah Loomans wrote:
On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans noah@noahloomans.com wrote:
This is my first time contributing to Linux, I hope this is a good patch. Feedback on how to improve is welcome!
The commit message is a bit long, but the patch itself looks good to me.
Hmm yeah it's a bit on a long side. I'm not sure what could be removed though, it all seems relevant for understanding the bug and the fix.
Applied with shortening the message slightly.
We might also consider applying the patch to all ChromeOS branches directly (not waiting for upstream); we do see a number of crashes because of it.
Guenter
On 2024-04-11 at 04:26 UTC+02, Guenter Roeck wrote:
We might also consider applying the patch to all ChromeOS branches directly (not waiting for upstream); we do see a number of crashes because of it.
Heh, I did wonder how Chromebooks running Chrome OS avoided this issue. I guess the answer is simple: they didn't :)
Hello:
This patch was applied to chrome-platform/linux.git (for-kernelci) by Tzung-Bi Shih tzungbi@kernel.org:
On Wed, 10 Apr 2024 20:26:19 +0200 you wrote:
The cros_ec_uart_probe() function calls devm_serdev_device_open() before it calls serdev_device_set_client_ops(). This can trigger a NULL pointer dereference:
BUG: kernel NULL pointer dereference, address: 0000000000000000 ... CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024 Workqueue: events_unbound flush_to_ldisc RIP: 0010:ttyport_receive_buf+0x3f/0xf0 ... Call Trace: <TASK> ? __die+0x10f/0x120 ? page_fault_oops+0x171/0x4e0 ? srso_return_thunk+0x5/0x5f ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? ttyport_receive_buf+0x3f/0xf0 flush_to_ldisc+0x9b/0x1c0 process_one_work+0x17b/0x340 worker_thread+0x301/0x490 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 </TASK>
[...]
Here is the summary with links: - platform/chrome: cros_ec_uart: properly fix race condition https://git.kernel.org/chrome-platform/c/5e700b384ec1
You are awesome, thank you!
Hello:
This patch was applied to chrome-platform/linux.git (for-next) by Tzung-Bi Shih tzungbi@kernel.org:
On Wed, 10 Apr 2024 20:26:19 +0200 you wrote:
The cros_ec_uart_probe() function calls devm_serdev_device_open() before it calls serdev_device_set_client_ops(). This can trigger a NULL pointer dereference:
BUG: kernel NULL pointer dereference, address: 0000000000000000 ... CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024 Workqueue: events_unbound flush_to_ldisc RIP: 0010:ttyport_receive_buf+0x3f/0xf0 ... Call Trace: <TASK> ? __die+0x10f/0x120 ? page_fault_oops+0x171/0x4e0 ? srso_return_thunk+0x5/0x5f ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? ttyport_receive_buf+0x3f/0xf0 flush_to_ldisc+0x9b/0x1c0 process_one_work+0x17b/0x340 worker_thread+0x301/0x490 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 </TASK>
[...]
Here is the summary with links: - platform/chrome: cros_ec_uart: properly fix race condition https://git.kernel.org/chrome-platform/c/5e700b384ec1
You are awesome, thank you!
linux-stable-mirror@lists.linaro.org