These are all fixes for the frozen notification patch [1], which as of today hasn't landed in mainline yet. As such, this patchset is rebased on top of the char-misc-next branch.
[1] https://lore.kernel.org/all/20240709070047.4055369-2-yutingtseng@google.com/
Cc: stable@vger.kernel.org Cc: Yu-Ting Tseng yutingtseng@google.com Cc: Alice Ryhl aliceryhl@google.com Cc: Todd Kjos tkjos@google.com Cc: Martijn Coenen maco@google.com Cc: Arve Hjønnevåg arve@android.com Cc: Viktor Martensson vmartensson@google.com
Carlos Llamas (4): binder: fix node UAF in binder_add_freeze_work() binder: fix OOB in binder_add_freeze_work() binder: fix freeze UAF in binder_release_work() binder: fix BINDER_WORK_FROZEN_BINDER debug logs
drivers/android/binder.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped in order to acquire the node->lock first (lock nesting order). This can race with binder_node_release() and trigger a use-after-free:
================================================================== BUG: KASAN: slab-use-after-free in _raw_spin_lock+0xe4/0x19c Write of size 4 at addr ffff53c04c29dd04 by task freeze/640
CPU: 5 UID: 0 PID: 640 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #17 Hardware name: linux,dummy-virt (DT) Call trace: _raw_spin_lock+0xe4/0x19c binder_add_freeze_work+0x148/0x478 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
Allocated by task 637: __kmalloc_cache_noprof+0x12c/0x27c binder_new_node+0x50/0x700 binder_transaction+0x35ac/0x6f74 binder_thread_write+0xfb8/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190
Freed by task 637: kfree+0xf0/0x330 binder_thread_read+0x1e88/0x3a68 binder_ioctl+0x16d8/0x25ac __arm64_sys_ioctl+0x124/0x190 ==================================================================
Fix the race by taking a temporary reference on the node before releasing the proc->inner lock. This ensures the node remains alive while in use.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com --- drivers/android/binder.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 978740537a1a..4d90203ea048 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5552,6 +5552,7 @@ static bool binder_txns_pending_ilocked(struct binder_proc *proc)
static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) { + struct binder_node *prev = NULL; struct rb_node *n; struct binder_ref *ref;
@@ -5560,7 +5561,10 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) struct binder_node *node;
node = rb_entry(n, struct binder_node, rb_node); + binder_inc_node_tmpref_ilocked(node); binder_inner_proc_unlock(proc); + if (prev) + binder_put_node(prev); binder_node_lock(node); hlist_for_each_entry(ref, &node->refs, node_entry) { /* @@ -5586,10 +5590,13 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) } binder_inner_proc_unlock(ref->proc); } + prev = node; binder_node_unlock(node); binder_inner_proc_lock(proc); } binder_inner_proc_unlock(proc); + if (prev) + binder_put_node(prev); }
static int binder_ioctl_freeze(struct binder_freeze_info *info,
On Tue, Sep 24, 2024 at 11:44 AM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped in order to acquire the node->lock first (lock nesting order). This can race with binder_node_release() and trigger a use-after-free:
================================================================== BUG: KASAN: slab-use-after-free in _raw_spin_lock+0xe4/0x19c Write of size 4 at addr ffff53c04c29dd04 by task freeze/640
CPU: 5 UID: 0 PID: 640 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #17 Hardware name: linux,dummy-virt (DT) Call trace: _raw_spin_lock+0xe4/0x19c binder_add_freeze_work+0x148/0x478 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
Allocated by task 637: __kmalloc_cache_noprof+0x12c/0x27c binder_new_node+0x50/0x700 binder_transaction+0x35ac/0x6f74 binder_thread_write+0xfb8/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190
Freed by task 637: kfree+0xf0/0x330 binder_thread_read+0x1e88/0x3a68 binder_ioctl+0x16d8/0x25ac __arm64_sys_ioctl+0x124/0x190 ==================================================================
Fix the race by taking a temporary reference on the node before releasing the proc->inner lock. This ensures the node remains alive while in use.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Acked-by: Todd Kjos tkjos@google.com
drivers/android/binder.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 978740537a1a..4d90203ea048 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5552,6 +5552,7 @@ static bool binder_txns_pending_ilocked(struct binder_proc *proc)
static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) {
struct binder_node *prev = NULL; struct rb_node *n; struct binder_ref *ref;
@@ -5560,7 +5561,10 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) struct binder_node *node;
node = rb_entry(n, struct binder_node, rb_node);
binder_inc_node_tmpref_ilocked(node); binder_inner_proc_unlock(proc);
if (prev)
binder_put_node(prev); binder_node_lock(node); hlist_for_each_entry(ref, &node->refs, node_entry) { /*
@@ -5586,10 +5590,13 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) } binder_inner_proc_unlock(ref->proc); }
prev = node; binder_node_unlock(node); binder_inner_proc_lock(proc); } binder_inner_proc_unlock(proc);
if (prev)
binder_put_node(prev);
}
static int binder_ioctl_freeze(struct binder_freeze_info *info,
2.46.0.792.g87dc391469-goog
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped in order to acquire the node->lock first (lock nesting order). This can race with binder_node_release() and trigger a use-after-free:
================================================================== BUG: KASAN: slab-use-after-free in _raw_spin_lock+0xe4/0x19c Write of size 4 at addr ffff53c04c29dd04 by task freeze/640
CPU: 5 UID: 0 PID: 640 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #17 Hardware name: linux,dummy-virt (DT) Call trace: _raw_spin_lock+0xe4/0x19c binder_add_freeze_work+0x148/0x478 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
Allocated by task 637: __kmalloc_cache_noprof+0x12c/0x27c binder_new_node+0x50/0x700 binder_transaction+0x35ac/0x6f74 binder_thread_write+0xfb8/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190
Freed by task 637: kfree+0xf0/0x330 binder_thread_read+0x1e88/0x3a68 binder_ioctl+0x16d8/0x25ac __arm64_sys_ioctl+0x124/0x190 ==================================================================
Fix the race by taking a temporary reference on the node before releasing the proc->inner lock. This ensures the node remains alive while in use.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Reviewed-by: Alice Ryhl aliceryhl@google.com
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped to acquire the node->lock first (lock nesting order). This can race with binder_deferred_release() which removes the nodes from the proc->nodes rbtree and adds them into binder_dead_nodes list. This leads to a broken iteration in binder_add_freeze_work() as rb_next() will use data from binder_dead_nodes, triggering an out-of-bounds access:
================================================================== BUG: KASAN: global-out-of-bounds in rb_next+0xfc/0x124 Read of size 8 at addr ffffcb84285f7170 by task freeze/660
CPU: 8 UID: 0 PID: 660 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #18 Hardware name: linux,dummy-virt (DT) Call trace: rb_next+0xfc/0x124 binder_add_freeze_work+0x344/0x534 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
The buggy address belongs to the variable: binder_dead_nodes+0x10/0x40 [...] ==================================================================
This is possible because proc->nodes (rbtree) and binder_dead_nodes (list) share entries in binder_node through a union:
struct binder_node { [...] union { struct rb_node rb_node; struct hlist_node dead_node; };
Fix the race by checking that the proc is still alive. If not, simply break out of the iteration.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4d90203ea048..8bca2de6fa24 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5593,6 +5593,8 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) prev = node; binder_node_unlock(node); binder_inner_proc_lock(proc); + if (proc->is_dead) + break; } binder_inner_proc_unlock(proc); if (prev)
On Tue, Sep 24, 2024 at 11:44 AM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped to acquire the node->lock first (lock nesting order). This can race with binder_deferred_release() which removes the nodes from the proc->nodes rbtree and adds them into binder_dead_nodes list. This leads to a broken iteration in binder_add_freeze_work() as rb_next() will use data from binder_dead_nodes, triggering an out-of-bounds access:
================================================================== BUG: KASAN: global-out-of-bounds in rb_next+0xfc/0x124 Read of size 8 at addr ffffcb84285f7170 by task freeze/660
CPU: 8 UID: 0 PID: 660 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #18 Hardware name: linux,dummy-virt (DT) Call trace: rb_next+0xfc/0x124 binder_add_freeze_work+0x344/0x534 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
The buggy address belongs to the variable: binder_dead_nodes+0x10/0x40 [...] ==================================================================
This is possible because proc->nodes (rbtree) and binder_dead_nodes (list) share entries in binder_node through a union:
struct binder_node { [...] union { struct rb_node rb_node; struct hlist_node dead_node; };
Fix the race by checking that the proc is still alive. If not, simply break out of the iteration.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Acked-by: Todd Kjos tkjos@google.com
drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4d90203ea048..8bca2de6fa24 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5593,6 +5593,8 @@ static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) prev = node; binder_node_unlock(node); binder_inner_proc_lock(proc);
if (proc->is_dead)
break; } binder_inner_proc_unlock(proc); if (prev)
-- 2.46.0.792.g87dc391469-goog
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped to acquire the node->lock first (lock nesting order). This can race with binder_deferred_release() which removes the nodes from the proc->nodes rbtree and adds them into binder_dead_nodes list. This leads to a broken iteration in binder_add_freeze_work() as rb_next() will use data from binder_dead_nodes, triggering an out-of-bounds access:
================================================================== BUG: KASAN: global-out-of-bounds in rb_next+0xfc/0x124 Read of size 8 at addr ffffcb84285f7170 by task freeze/660
CPU: 8 UID: 0 PID: 660 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #18 Hardware name: linux,dummy-virt (DT) Call trace: rb_next+0xfc/0x124 binder_add_freeze_work+0x344/0x534 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
The buggy address belongs to the variable: binder_dead_nodes+0x10/0x40 [...] ==================================================================
This is possible because proc->nodes (rbtree) and binder_dead_nodes (list) share entries in binder_node through a union:
struct binder_node { [...] union { struct rb_node rb_node; struct hlist_node dead_node; };
Fix the race by checking that the proc is still alive. If not, simply break out of the iteration.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
This change LGTM. Reviewed-by: Alice Ryhl aliceryhl@google.com
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Alice
On Wed, Sep 25, 2024 at 10:02:51AM +0200, 'Alice Ryhl' via kernel-team wrote:
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped to acquire the node->lock first (lock nesting order). This can race with binder_deferred_release() which removes the nodes from the proc->nodes rbtree and adds them into binder_dead_nodes list. This leads to a broken iteration in binder_add_freeze_work() as rb_next() will use data from binder_dead_nodes, triggering an out-of-bounds access:
================================================================== BUG: KASAN: global-out-of-bounds in rb_next+0xfc/0x124 Read of size 8 at addr ffffcb84285f7170 by task freeze/660
CPU: 8 UID: 0 PID: 660 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #18 Hardware name: linux,dummy-virt (DT) Call trace: rb_next+0xfc/0x124 binder_add_freeze_work+0x344/0x534 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
The buggy address belongs to the variable: binder_dead_nodes+0x10/0x40 [...] ==================================================================
This is possible because proc->nodes (rbtree) and binder_dead_nodes (list) share entries in binder_node through a union:
struct binder_node { [...] union { struct rb_node rb_node; struct hlist_node dead_node; };
Fix the race by checking that the proc is still alive. If not, simply break out of the iteration.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
This change LGTM. Reviewed-by: Alice Ryhl aliceryhl@google.com
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Cheers, Carlos Llamas
On Wed, Sep 25, 2024 at 7:48 PM Carlos Llamas cmllamas@google.com wrote:
On Wed, Sep 25, 2024 at 10:02:51AM +0200, 'Alice Ryhl' via kernel-team wrote:
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
In binder_add_freeze_work() we iterate over the proc->nodes with the proc->inner_lock held. However, this lock is temporarily dropped to acquire the node->lock first (lock nesting order). This can race with binder_deferred_release() which removes the nodes from the proc->nodes rbtree and adds them into binder_dead_nodes list. This leads to a broken iteration in binder_add_freeze_work() as rb_next() will use data from binder_dead_nodes, triggering an out-of-bounds access:
================================================================== BUG: KASAN: global-out-of-bounds in rb_next+0xfc/0x124 Read of size 8 at addr ffffcb84285f7170 by task freeze/660
CPU: 8 UID: 0 PID: 660 Comm: freeze Not tainted 6.11.0-07343-ga727812a8d45 #18 Hardware name: linux,dummy-virt (DT) Call trace: rb_next+0xfc/0x124 binder_add_freeze_work+0x344/0x534 binder_ioctl+0x1e70/0x25ac __arm64_sys_ioctl+0x124/0x190
The buggy address belongs to the variable: binder_dead_nodes+0x10/0x40 [...] ==================================================================
This is possible because proc->nodes (rbtree) and binder_dead_nodes (list) share entries in binder_node through a union:
struct binder_node { [...] union { struct rb_node rb_node; struct hlist_node dead_node; };
Fix the race by checking that the proc is still alive. If not, simply break out of the iteration.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
This change LGTM. Reviewed-by: Alice Ryhl aliceryhl@google.com
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Well, maybe? I think it's best to not return errnos from these commands at all, as they obscure how many commands were processed.
Since the node still exists even if the process dies, perhaps we can just let you create the freeze notification even if it's dead? We can make it end up in the same state as if you request a freeze notification and the process then dies afterwards.
Alice
On Wed, Sep 25, 2024 at 07:52:37PM +0200, Alice Ryhl wrote:
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Well, maybe? I think it's best to not return errnos from these commands at all, as they obscure how many commands were processed.
This is problematic, particularly when it's a multi-command buffer. Userspace doesn't really know which one failed and if any of them succeeded. Agreed.
Since the node still exists even if the process dies, perhaps we can just let you create the freeze notification even if it's dead? We can make it end up in the same state as if you request a freeze notification and the process then dies afterwards.
It's a dead node, there is no process associated with it. It would be incorrect to setup the notification as it doesn't have a frozen status anymore. We can't determine the ref->node->proc->is_frozen?
We could silently fail and skip the notification, but I don't know if userspace will attempt to release it later... and fail with EINVAL.
-- Carlos Llamas
On Wed, Sep 25, 2024 at 06:06:30PM +0000, Carlos Llamas wrote:
On Wed, Sep 25, 2024 at 07:52:37PM +0200, Alice Ryhl wrote:
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Well, maybe? I think it's best to not return errnos from these commands at all, as they obscure how many commands were processed.
This is problematic, particularly when it's a multi-command buffer. Userspace doesn't really know which one failed and if any of them succeeded. Agreed.
Since the node still exists even if the process dies, perhaps we can just let you create the freeze notification even if it's dead? We can make it end up in the same state as if you request a freeze notification and the process then dies afterwards.
It's a dead node, there is no process associated with it. It would be incorrect to setup the notification as it doesn't have a frozen status anymore. We can't determine the ref->node->proc->is_frozen?
We could silently fail and skip the notification, but I don't know if userspace will attempt to release it later... and fail with EINVAL.
FWIW, we already propagate errors when the target process or node is dead in some other places. It makes sense to me to let userspace know that we couldn't setup the frozen notification.
On Wed, Sep 25, 2024 at 8:06 PM Carlos Llamas cmllamas@google.com wrote:
On Wed, Sep 25, 2024 at 07:52:37PM +0200, Alice Ryhl wrote:
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Well, maybe? I think it's best to not return errnos from these commands at all, as they obscure how many commands were processed.
This is problematic, particularly when it's a multi-command buffer. Userspace doesn't really know which one failed and if any of them succeeded. Agreed.
Since the node still exists even if the process dies, perhaps we can just let you create the freeze notification even if it's dead? We can make it end up in the same state as if you request a freeze notification and the process then dies afterwards.
It's a dead node, there is no process associated with it. It would be incorrect to setup the notification as it doesn't have a frozen status anymore. We can't determine the ref->node->proc->is_frozen?
We could silently fail and skip the notification, but I don't know if userspace will attempt to release it later... and fail with EINVAL.
I mean, userspace *has* to be able to deal with the case where the process dies *right after* the freeze notification is registered. If we make the behavior where it's already dead be the same as that case, then we're not giving userspace any new things it needs to care about.
Alice
On Thu, Sep 26, 2024 at 10:06:14AM +0200, Alice Ryhl wrote:
On Wed, Sep 25, 2024 at 8:06 PM Carlos Llamas cmllamas@google.com wrote:
On Wed, Sep 25, 2024 at 07:52:37PM +0200, Alice Ryhl wrote:
I reviewed some other code paths to verify whether there are other problems with processes dying concurrently with operations on freeze notifications. I didn't notice any other memory safety issues, but I
Yeah most other paths are protected with binder_procs_lock mutex.
noticed that binder_request_freeze_notification returns EINVAL if you try to use it with a node from a dead process. That seems problematic, as this means that there's no way to invoke that command without risking an EINVAL error if the remote process dies. We should not return EINVAL errors on correct usage of the driver.
Agreed, this should probably be -ESRCH or something. I'll add it to v2, thanks for the suggestion.
Well, maybe? I think it's best to not return errnos from these commands at all, as they obscure how many commands were processed.
This is problematic, particularly when it's a multi-command buffer. Userspace doesn't really know which one failed and if any of them succeeded. Agreed.
Since the node still exists even if the process dies, perhaps we can just let you create the freeze notification even if it's dead? We can make it end up in the same state as if you request a freeze notification and the process then dies afterwards.
It's a dead node, there is no process associated with it. It would be incorrect to setup the notification as it doesn't have a frozen status anymore. We can't determine the ref->node->proc->is_frozen?
We could silently fail and skip the notification, but I don't know if userspace will attempt to release it later... and fail with EINVAL.
I mean, userspace *has* to be able to deal with the case where the process dies *right after* the freeze notification is registered. If we make the behavior where it's already dead be the same as that case, then we're not giving userspace any new things it needs to care about.
This is a fair point. To make it happen though, we would need to modify the behavior of the request a bit. If the node is dead, we could still attach the freeze notification to the reference but then we would skip sending the "current" frozen state. This last bit won't be guaranteed anymore. I _suppose_ this is ok, since as you mention, userspace should have to deal with the process dying anyway.
I honestly don't really like this "fake successful" approach but then we don't handle driver errors very well either. So it might be worth it to avoid propagating this "dead node" error if we can. I'll do this for v2.
Thanks, Carlos Llamas
When a binder reference is cleaned up, any freeze work queued in the associated process should also be removed. Otherwise, the reference is freed while its ref->freeze.work is still queued in proc->work leading to a use-after-free issue as shown by the following KASAN report:
================================================================== BUG: KASAN: slab-use-after-free in binder_release_work+0x398/0x3d0 Read of size 8 at addr ffff31600ee91488 by task kworker/5:1/211
CPU: 5 UID: 0 PID: 211 Comm: kworker/5:1 Not tainted 6.11.0-rc7-00382-gfc6c92196396 #22 Hardware name: linux,dummy-virt (DT) Workqueue: events binder_deferred_func Call trace: binder_release_work+0x398/0x3d0 binder_deferred_func+0xb60/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8
Allocated by task 703: __kmalloc_cache_noprof+0x130/0x280 binder_thread_write+0xdb4/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190 invoke_syscall+0x6c/0x254
Freed by task 211: kfree+0xc4/0x230 binder_deferred_func+0xae8/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8 ==================================================================
This commit fixes the issue by ensuring any queued freeze work is removed when cleaning up a binder reference.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com --- drivers/android/binder.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8bca2de6fa24..d955135ee37a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1225,6 +1225,12 @@ static void binder_cleanup_ref_olocked(struct binder_ref *ref) binder_dequeue_work(ref->proc, &ref->death->work); binder_stats_deleted(BINDER_STAT_DEATH); } + + if (ref->freeze) { + binder_dequeue_work(ref->proc, &ref->freeze->work); + binder_stats_deleted(BINDER_STAT_FREEZE); + } + binder_stats_deleted(BINDER_STAT_REF); }
On Tue, Sep 24, 2024 at 11:44 AM Carlos Llamas cmllamas@google.com wrote:
When a binder reference is cleaned up, any freeze work queued in the associated process should also be removed. Otherwise, the reference is freed while its ref->freeze.work is still queued in proc->work leading to a use-after-free issue as shown by the following KASAN report:
================================================================== BUG: KASAN: slab-use-after-free in binder_release_work+0x398/0x3d0 Read of size 8 at addr ffff31600ee91488 by task kworker/5:1/211
CPU: 5 UID: 0 PID: 211 Comm: kworker/5:1 Not tainted 6.11.0-rc7-00382-gfc6c92196396 #22 Hardware name: linux,dummy-virt (DT) Workqueue: events binder_deferred_func Call trace: binder_release_work+0x398/0x3d0 binder_deferred_func+0xb60/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8
Allocated by task 703: __kmalloc_cache_noprof+0x130/0x280 binder_thread_write+0xdb4/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190 invoke_syscall+0x6c/0x254
Freed by task 211: kfree+0xc4/0x230 binder_deferred_func+0xae8/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8 ==================================================================
This commit fixes the issue by ensuring any queued freeze work is removed when cleaning up a binder reference.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Acked-by: Todd Kjos tkjos@android.com
drivers/android/binder.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8bca2de6fa24..d955135ee37a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1225,6 +1225,12 @@ static void binder_cleanup_ref_olocked(struct binder_ref *ref) binder_dequeue_work(ref->proc, &ref->death->work); binder_stats_deleted(BINDER_STAT_DEATH); }
if (ref->freeze) {
binder_dequeue_work(ref->proc, &ref->freeze->work);
binder_stats_deleted(BINDER_STAT_FREEZE);
}
binder_stats_deleted(BINDER_STAT_REF);
}
-- 2.46.0.792.g87dc391469-goog
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
When a binder reference is cleaned up, any freeze work queued in the associated process should also be removed. Otherwise, the reference is freed while its ref->freeze.work is still queued in proc->work leading to a use-after-free issue as shown by the following KASAN report:
================================================================== BUG: KASAN: slab-use-after-free in binder_release_work+0x398/0x3d0 Read of size 8 at addr ffff31600ee91488 by task kworker/5:1/211
CPU: 5 UID: 0 PID: 211 Comm: kworker/5:1 Not tainted 6.11.0-rc7-00382-gfc6c92196396 #22 Hardware name: linux,dummy-virt (DT) Workqueue: events binder_deferred_func Call trace: binder_release_work+0x398/0x3d0 binder_deferred_func+0xb60/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8
Allocated by task 703: __kmalloc_cache_noprof+0x130/0x280 binder_thread_write+0xdb4/0x42a0 binder_ioctl+0x18f0/0x25ac __arm64_sys_ioctl+0x124/0x190 invoke_syscall+0x6c/0x254
Freed by task 211: kfree+0xc4/0x230 binder_deferred_func+0xae8/0x109c process_one_work+0x51c/0xbd4 worker_thread+0x608/0xee8 ==================================================================
This commit fixes the issue by ensuring any queued freeze work is removed when cleaning up a binder reference.
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Reviewed-by: Alice Ryhl aliceryhl@google.com
The BINDER_WORK_FROZEN_BINDER type is not handled in the binder_logs entries and it shows up as "unknown work" when logged:
proc 649 context binder-test thread 649: l 00 need_return 0 tr 0 ref 13: desc 1 node 8 s 1 w 0 d 0000000053c4c0c3 unknown work: type 10
This patch add the freeze work type and is now logged as such:
proc 637 context binder-test thread 637: l 00 need_return 0 tr 0 ref 8: desc 1 node 3 s 1 w 0 d 00000000dc39e9c6 has frozen binder
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d955135ee37a..2be9f3559ed7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6408,6 +6408,9 @@ static void print_binder_work_ilocked(struct seq_file *m, case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break; + case BINDER_WORK_FROZEN_BINDER: + seq_printf(m, "%shas frozen binder\n", prefix); + break; default: seq_printf(m, "%sunknown work: type %d\n", prefix, w->type); break;
On Tue, Sep 24, 2024 at 11:44 AM Carlos Llamas cmllamas@google.com wrote:
The BINDER_WORK_FROZEN_BINDER type is not handled in the binder_logs entries and it shows up as "unknown work" when logged:
proc 649 context binder-test thread 649: l 00 need_return 0 tr 0 ref 13: desc 1 node 8 s 1 w 0 d 0000000053c4c0c3 unknown work: type 10
This patch add the freeze work type and is now logged as such:
proc 637 context binder-test thread 637: l 00 need_return 0 tr 0 ref 8: desc 1 node 3 s 1 w 0 d 00000000dc39e9c6 has frozen binder
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
Acked-by: Todd Kjos tkjos@google.com
drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d955135ee37a..2be9f3559ed7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6408,6 +6408,9 @@ static void print_binder_work_ilocked(struct seq_file *m, case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break;
case BINDER_WORK_FROZEN_BINDER:
seq_printf(m, "%shas frozen binder\n", prefix);
break; default: seq_printf(m, "%sunknown work: type %d\n", prefix, w->type); break;
-- 2.46.0.792.g87dc391469-goog
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
The BINDER_WORK_FROZEN_BINDER type is not handled in the binder_logs entries and it shows up as "unknown work" when logged:
proc 649 context binder-test thread 649: l 00 need_return 0 tr 0 ref 13: desc 1 node 8 s 1 w 0 d 0000000053c4c0c3 unknown work: type 10
This patch add the freeze work type and is now logged as such:
proc 637 context binder-test thread 637: l 00 need_return 0 tr 0 ref 8: desc 1 node 3 s 1 w 0 d 00000000dc39e9c6 has frozen binder
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d955135ee37a..2be9f3559ed7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6408,6 +6408,9 @@ static void print_binder_work_ilocked(struct seq_file *m, case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break;
case BINDER_WORK_FROZEN_BINDER:
seq_printf(m, "%shas frozen binder\n", prefix);
break;
What about BINDER_WORK_CLEAR_FREEZE_NOTIFICATION?
Alice
On Wed, Sep 25, 2024 at 09:36:10AM +0200, 'Alice Ryhl' via kernel-team wrote:
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
The BINDER_WORK_FROZEN_BINDER type is not handled in the binder_logs entries and it shows up as "unknown work" when logged:
proc 649 context binder-test thread 649: l 00 need_return 0 tr 0 ref 13: desc 1 node 8 s 1 w 0 d 0000000053c4c0c3 unknown work: type 10
This patch add the freeze work type and is now logged as such:
proc 637 context binder-test thread 637: l 00 need_return 0 tr 0 ref 8: desc 1 node 3 s 1 w 0 d 00000000dc39e9c6 has frozen binder
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d955135ee37a..2be9f3559ed7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6408,6 +6408,9 @@ static void print_binder_work_ilocked(struct seq_file *m, case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break;
case BINDER_WORK_FROZEN_BINDER:
seq_printf(m, "%shas frozen binder\n", prefix);
break;
What about BINDER_WORK_CLEAR_FREEZE_NOTIFICATION?
Oh, you are right! We also need this type here. I haven't played with the clear notification path just yet (as you can tell). Thanks for pointing it out though, I'll send a v2.
Looking closer, I see that BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT is also missing, so I'll send a separate patch for that too.
Thanks, -- Carlos Llamas
On Wed, Sep 25, 2024 at 05:25:42PM +0000, Carlos Llamas wrote:
On Wed, Sep 25, 2024 at 09:36:10AM +0200, 'Alice Ryhl' via kernel-team wrote:
On Tue, Sep 24, 2024 at 8:44 PM Carlos Llamas cmllamas@google.com wrote:
The BINDER_WORK_FROZEN_BINDER type is not handled in the binder_logs entries and it shows up as "unknown work" when logged:
proc 649 context binder-test thread 649: l 00 need_return 0 tr 0 ref 13: desc 1 node 8 s 1 w 0 d 0000000053c4c0c3 unknown work: type 10
This patch add the freeze work type and is now logged as such:
proc 637 context binder-test thread 637: l 00 need_return 0 tr 0 ref 8: desc 1 node 3 s 1 w 0 d 00000000dc39e9c6 has frozen binder
Fixes: d579b04a52a1 ("binder: frozen notification") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas cmllamas@google.com
drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d955135ee37a..2be9f3559ed7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6408,6 +6408,9 @@ static void print_binder_work_ilocked(struct seq_file *m, case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break;
case BINDER_WORK_FROZEN_BINDER:
seq_printf(m, "%shas frozen binder\n", prefix);
break;
What about BINDER_WORK_CLEAR_FREEZE_NOTIFICATION?
Oh, you are right! We also need this type here. I haven't played with the clear notification path just yet (as you can tell). Thanks for pointing it out though, I'll send a v2.
Looking closer, I see that BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT is also missing, so I'll send a separate patch for that too.
After playing with clear-freeze I've also found a memleak. This work is not being released unless it is processed. I'll add one more patch to the v2 series fixing it (tomorrow).
unreferenced object 0xffff03ced63cd280 (size 64): comm "binder-util", pid 947, jiffies 4295183379 hex dump (first 32 bytes): 80 d2 3c d6 ce 03 ff ff 80 d2 3c d6 ce 03 ff ff ..<.......<..... 0b 00 00 00 00 00 00 00 3c 1f 4a 00 00 00 00 00 ........<.J..... backtrace (crc 718d20df): [<00000000188a0477>] kmemleak_alloc+0x34/0x40 [<00000000fe1c45bc>] __kmalloc_cache_noprof+0x208/0x280 [<000000005ebe1c53>] binder_thread_write+0xdec/0x439c [<000000004b0e4ffa>] binder_ioctl+0x1b68/0x22cc [<000000001786d65d>] __arm64_sys_ioctl+0x124/0x190 [<00000000fb1e34f9>] invoke_syscall+0x6c/0x254 [<00000000826a09b6>] el0_svc_common.constprop.0+0xac/0x230 [<000000004389c74d>] do_el0_svc+0x40/0x58 [<000000008b9dd949>] el0_svc+0x38/0x78 [<00000000c66f77b2>] el0t_64_sync_handler+0x120/0x12c [<00000000a4cd389b>] el0t_64_sync+0x190/0x194>
-- Carlos Llamas
linux-stable-mirror@lists.linaro.org