 
            On 17.09.25 22:01, Jason Gunthorpe wrote:
fput() doesn't actually call file_operations release() synchronously, it puts the file on a work queue and it will be released eventually.
This is normally fine, except for iommufd the file and the iommufd_object are tied to gether. The file has the object as it's private_data and holds a users refcount, while the object is expected to remain alive as long as the file is.
When the allocation of a new object aborts before installing the file it will fput() the file and then go on to immediately kfree() the obj. This causes a UAF once the workqueue completes the fput() and tries to decrement the users refcount.
Fix this by putting the core code in charge of the file lifetime, and call __fput_sync() during abort to ensure that release() is called before kfree.
Thanks for the detailed description. I had looked into this but couldn't find a straightforward solution.
Reviewed-by: Nirmoy Das nirmoyd@nvidia.com
__fput_sync() is a bit too tricky to open code in all the object implementations. Instead the objects tell the core code where the file pointer is and the core will take care of the life cycle.
If the object is successfully allocated then the file will hold a users refcount and the iommufd_object cannot be destroyed.
It is worth noting that close(); ioctl(IOMMU_DESTROY); doesn't have an issue because close() is already using a synchronous version of fput().
The UAF looks like this:
BUG: KASAN: slab-use-after-free in iommufd_eventq_fops_release+0x45/0xc0 drivers/iommu/iommufd/eventq.c:376 Write of size 4 at addr ffff888059c97804 by task syz.0.46/6164 CPU: 0 UID: 0 PID: 6164 Comm: syz.0.46 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcd/0x630 mm/kasan/report.c:482 kasan_report+0xe0/0x110 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x100/0x1b0 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_fetch_sub_release include/linux/atomic/atomic-instrumented.h:400 [inline] __refcount_dec include/linux/refcount.h:455 [inline] refcount_dec include/linux/refcount.h:476 [inline] iommufd_eventq_fops_release+0x45/0xc0 drivers/iommu/iommufd/eventq.c:376 __fput+0x402/0xb70 fs/file_table.c:468 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xeb/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x41c/0x4c0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7fCc: stable@vger.kernel.org Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Reported-by: syzbot+80620e2d0d0a33b09f93@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/68c8583d.050a0220.2ff435.03a2.GAE@google.com Signed-off-by: Jason Gunthorpe jgg@nvidia.coz
drivers/iommu/iommufd/eventq.c | 9 ++------- drivers/iommu/iommufd/main.c | 35 +++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index fc4de63b0bce64..e23d9ee4fe3806 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -393,12 +393,12 @@ static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, const struct file_operations *fops) { struct file *filep;
- int fdno;
spin_lock_init(&eventq->lock); INIT_LIST_HEAD(&eventq->deliver); init_waitqueue_head(&eventq->wait_queue);
- /* The filep is fput() by the core code during failure */ filep = anon_inode_getfile(name, fops, eventq, O_RDWR); if (IS_ERR(filep)) return PTR_ERR(filep);
@@ -408,10 +408,7 @@ static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, eventq->filep = filep; refcount_inc(&eventq->obj.users);
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0)
fput(filep);- return fdno;
- return get_unused_fd_flags(O_CLOEXEC); }
static const struct file_operations iommufd_fault_fops = @@ -452,7 +449,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) return 0; out_put_fdno: put_unused_fd(fdno);
- fput(fault->common.filep); return rc; }
@@ -536,7 +532,6 @@ int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) out_put_fdno: put_unused_fd(fdno);
- fput(veventq->common.filep); out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj); out_unlock_veventqs:
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 109de747e8b3ed..b8475194279a9a 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -23,6 +23,7 @@ #include "iommufd_test.h" struct iommufd_object_ops {
- size_t file_offset; void (*pre_destroy)(struct iommufd_object *obj); void (*destroy)(struct iommufd_object *obj); void (*abort)(struct iommufd_object *obj);
@@ -131,10 +132,30 @@ void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj) {
- if (iommufd_object_ops[obj->type].abort)
iommufd_object_ops[obj->type].abort(obj);
- const struct iommufd_object_ops *ops = &iommufd_object_ops[obj->type];
- if (ops->file_offset) {
struct file **filep = ((void *)obj) + ops->file_offset;
/*
* files should hold a users refcount while the file is open and
* put it back in their release. They should hold a pointer to
* obj in their private data. Normal fput() is deferred to a
* workqueue and can get out of order with the following
* kfree(obj). Using the sync version ensures the release
* happens immediately. During abort we require the file
* refcount is one at this point - meaning the object alloc
* function cannot do anything to allow another thread to take a
* refcount prior to a guaranteed success.
*/
if (*filep)
__fput_sync(*filep);- }
- if (ops->abort)
else
ops->abort(obj);
iommufd_object_ops[obj->type].destroy(obj);
iommufd_object_abort(ictx, obj); }
ops->destroy(obj);@@ -659,6 +680,12 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, "IOMMUFD"); +#define IOMMUFD_FILE_OFFSET(_struct, _filep, _obj) \
- .file_offset = (offsetof(_struct, _filep) + \
BUILD_BUG_ON_ZERO(!__same_type( \
struct file *, ((_struct *)NULL)->_filep)) + \
BUILD_BUG_ON_ZERO(offsetof(_struct, _obj)))- static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_ACCESS] = { .destroy = iommufd_access_destroy_object,
@@ -669,6 +696,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy,
}, [IOMMUFD_OBJ_HW_QUEUE] = { .destroy = iommufd_hw_queue_destroy,
IOMMUFD_FILE_OFFSET(struct iommufd_fault, common.filep, common.obj),@@ -691,6 +719,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_VEVENTQ] = { .destroy = iommufd_veventq_destroy, .abort = iommufd_veventq_abort,
}, [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy,
IOMMUFD_FILE_OFFSET(struct iommufd_veventq, common.filep, common.obj),