On Wed, 2025-11-19 at 20:30 +0000, Matthew Auld wrote:
From: Matthew Brost matthew.brost@intel.com
Currently this is very broken if someone attempts to create a bind queue and share it across multiple VMs. For example currently we assume it is safe to acquire the user VM lock to protect some of the bind queue state, but if allow sharing the bind queue with multiple VMs then this quickly breaks down.
To fix this reject using a bind queue with any VM that is not the same VM that was originally passed when creating the bind queue. This a uAPI change, however this was more of an oversight on kernel side that we didn't reject this, and expectation is that userspace shouldn't be using bind queues in this way, so in theory this change should go unnoticed.
Mesa only uses the bind queue to bind bos in the vm_id set in the creation of bind queue so this is:
Acked-by: José Roberto de Souza <jose.souza@intel.com
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Matthew Brost matthew.brost@intel.com Co-developed-by: Matthew Auld matthew.auld@intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Michal Mrozek michal.mrozek@intel.com Cc: Jose Souza jose.souza@intel.com Cc: Carl Zhang carl.zhang@intel.com Cc: stable@vger.kernel.org # v6.8+
drivers/gpu/drm/xe/xe_exec_queue.c | 27 +++++++++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue.h | 1 + drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 ++++++ drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 7 +++++- 5 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8724f8de67e2..31bb051cbb78 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -328,6 +328,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe * @xe: Xe device. * @tile: tile which bind exec queue belongs to. * @flags: exec queue creation flags
- @user_vm: The user VM which this exec queue belongs to
* @extensions: exec queue creation extensions * * Normalize bind exec queue creation. Bind exec queue is tied to migration VM @@ -341,6 +342,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe */ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm*user_vm, u32 flags, u64 extensions) { struct xe_gt *gt = tile->primary_gt; @@ -379,6 +381,9 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } }
- if (user_vm)
q->user_vm = xe_vm_get(user_vm);return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -407,6 +412,8 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); }
- xe_vm_put(q->user_vm);
q->ops->destroy(q); } @@ -742,6 +749,23 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) return -EINVAL;
vm = xe_vm_lookup(xef, args->vm_id);if (XE_IOCTL_DBG(xe, !vm))return -ENOENT;err = down_read_interruptible(&vm->lock);if (err) {xe_vm_put(vm);return err;}if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))){
up_read(&vm->lock);xe_vm_put(vm);return -ENOENT;}up_read(&vm->lock);for_each_tile(tile, xe, id) { struct xe_exec_queue *new; @@ -749,7 +773,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (id) flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
new = xe_exec_queue_create_bind(xe, tile,flags,
new = xe_exec_queue_create_bind(xe, tile,vm, flags, args-
extensions);
if (IS_ERR(new)) { err = PTR_ERR(new); @@ -763,6 +787,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, list_add_tail(&new->multi_gt_list, &q->multi_gt_link); }
xe_vm_put(vm);} else { logical_mask = calc_validate_logical_mask(xe, eci, args-
width,
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index fda4d4f9bda8..37a9da22f420 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -28,6 +28,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe u32 flags, u64 extensions); struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm*user_vm, u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 771ffe35cd0c..3a4263c92b3d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -54,6 +54,12 @@ struct xe_exec_queue { struct kref refcount; /** @vm: VM (address space) for this exec queue */ struct xe_vm *vm;
- /**
* @user_vm: User VM (address space) for this exec queue(bind queues
* only)*/- struct xe_vm *user_vm;
/** @class: class of this exec queue */ enum xe_engine_class class; /** diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 33f4238604e1..f7b7c44cf2f6 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -350,7 +350,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe) flags = EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | EXEC_QUEUE_FLAG_MIGRATE;
q = xe_exec_queue_create_bind(xe, tile, flags, 0);
q = xe_exec_queue_create_bind(xe, tile, NULL, flags,0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_ret; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f9989a7a710c..7973d654540a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1614,7 +1614,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (!vm->pt_root[id]) continue;
q = xe_exec_queue_create_bind(xe, tile,create_flags, 0);
q = xe_exec_queue_create_bind(xe, tile, vm,create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; @@ -3571,6 +3571,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } }
- if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
err = -EINVAL;goto put_exec_queue;- }
/* Ensure all UNMAPs visible */ xe_svm_flush(vm);