As the introduction of nested translation, there are page tables managed by userspace. hw_pagetables can be stage-1 pagetable, stage-2 pagetable or just standalone pagetable.
Stage-2 page table and standalone pagetable are kernel-managed for security. iommufd has already supported it.
Stage-1 pagetable is user-managed and needs to work with a stage-2 page table. Hence, userspace should provide a hw_pagetable ID that points to a stage-2 hw_pagetable. Since stage-1 is user-managed, so an ioctl is added to sync the IOTLB when there is modification in the stage-1 page table.
The first available user-managed hw_pagtable type is the Intel VT-d stage-1 pagetable for nested translation.
Signed-off-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Yi Liu yi.l.liu@intel.com --- drivers/iommu/iommufd/device.c | 3 +- drivers/iommu/iommufd/hw_pagetable.c | 71 ++++++++++++++++++++++++- drivers/iommu/iommufd/iommufd_private.h | 1 + drivers/iommu/iommufd/main.c | 8 +++ include/uapi/linux/iommufd.h | 34 ++++++++++++ 5 files changed, 114 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 6d948fa418d5..c19e2f54a44f 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -164,7 +164,8 @@ struct device *iommufd_obj_dev(struct iommufd_object *obj) * indexed by the members defined in enum iommu_device_data_type. */ const u64 iommufd_supported_pgtbl_types[] = { - [IOMMU_DEVICE_DATA_INTEL_VTD] = BIT_ULL(IOMMU_PGTBL_DATA_NONE), + [IOMMU_DEVICE_DATA_INTEL_VTD] = BIT_ULL(IOMMU_PGTBL_DATA_NONE) | + BIT_ULL(IOMMU_PGTBL_DATA_VTD_S1), };
int iommufd_device_get_info(struct iommufd_ucmd *ucmd) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 02dee8e8d958..44a75ccc8e08 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -108,11 +108,12 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, */ static const size_t iommufd_hwpt_info_size[] = { [IOMMU_PGTBL_DATA_NONE] = 0, + [IOMMU_PGTBL_DATA_VTD_S1] = sizeof(struct iommu_hwpt_intel_vtd), };
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) { - struct iommufd_hw_pagetable *hwpt; + struct iommufd_hw_pagetable *hwpt, *parent = NULL; struct iommu_hwpt_alloc *cmd = ucmd->cmd; struct iommufd_ctx *ictx = ucmd->ictx; struct iommufd_object *pt_obj = NULL; @@ -160,6 +161,19 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) }
switch (pt_obj->type) { + case IOMMUFD_OBJ_HW_PAGETABLE: + parent = container_of(pt_obj, struct iommufd_hw_pagetable, obj); + /* + * Cannot allocate user-managed hwpt linking to auto_created + * hwpt. If the parent hwpt is already a user-managed hwpt, + * don't allocate another user-managed hwpt linking to it. + */ + if (parent->auto_domain || parent->parent) { + rc = -EINVAL; + goto out_put_pt; + } + ioas = parent->ioas; + break; case IOMMUFD_OBJ_IOAS: ioas = container_of(pt_obj, struct iommufd_ioas, obj); break; @@ -189,7 +203,7 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) }
mutex_lock(&ioas->mutex); - hwpt = __iommufd_hw_pagetable_alloc(ictx, ioas, dev, NULL, data); + hwpt = __iommufd_hw_pagetable_alloc(ictx, ioas, dev, parent, data); mutex_unlock(&ioas->mutex); if (IS_ERR(hwpt)) { rc = PTR_ERR(hwpt); @@ -217,3 +231,56 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) iommufd_put_object(dev_obj); return rc; } + +static u32 iommufd_hwpt_invalidate_info_size[] = { + [IOMMU_PGTBL_DATA_VTD_S1] = sizeof(struct iommu_hwpt_invalidate_intel_vtd), +}; + +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + u64 user_ptr; + u32 user_data_len, klen; + int rc = 0; + + /* + * No invalidation needed for type==IOMMU_PGTBL_DATA_NONE. + * data_len should not exceed the size of iommufd_invalidate_buffer. + */ + if (cmd->data_type == IOMMU_PGTBL_DATA_NONE || !cmd->data_len) + return -EOPNOTSUPP; + + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + /* Do not allow any kernel-managed hw_pagetable */ + if (!hwpt->parent) { + rc = -EINVAL; + goto out_put_hwpt; + } + + klen = iommufd_hwpt_invalidate_info_size[cmd->data_type]; + if (!klen) { + rc = -EINVAL; + goto out_put_hwpt; + } + + /* + * copy the needed fields before reusing the ucmd buffer, this + * avoids memory allocation in this path. + */ + user_ptr = cmd->data_uptr; + user_data_len = cmd->data_len; + + rc = copy_struct_from_user(cmd, klen, + u64_to_user_ptr(user_ptr), user_data_len); + if (rc) + goto out_put_hwpt; + + hwpt->domain->ops->iotlb_sync_user(hwpt->domain, cmd); +out_put_hwpt: + iommufd_put_object(&hwpt->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 5ef034451f4b..bb341e633c18 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -267,6 +267,7 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) }
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
struct device *iommufd_obj_dev(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 831303d64abe..6e2d8805daf3 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -252,6 +252,12 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_device_info info; struct iommu_hwpt_alloc hwpt; + struct iommu_hwpt_invalidate cache; + /* + * data_type specific structure used in the cache invalidation + * path. + */ + struct iommu_hwpt_invalidate_intel_vtd vtd; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -287,6 +293,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), + IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, + struct iommu_hwpt_invalidate, data_uptr), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f501add5ffe9..cb6a9ee215f4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -47,6 +47,7 @@ enum { IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_DEVICE_GET_INFO, IOMMUFD_CMD_HWPT_ALLOC, + IOMMUFD_CMD_HWPT_INVALIDATE, };
/** @@ -377,9 +378,11 @@ struct iommu_device_info_vtd { /** * enum iommu_pgtbl_data_type - IOMMU Page Table User Data type * @IOMMU_PGTBL_DATA_NONE: no user data + * @IOMMU_PGTBL_DATA_VTD_S1: Data for Intel VT-d stage-1 page table */ enum iommu_pgtbl_data_type { IOMMU_PGTBL_DATA_NONE, + IOMMU_PGTBL_DATA_VTD_S1, };
/** @@ -495,6 +498,8 @@ struct iommu_hwpt_intel_vtd { * +------------------------------+-------------------------------------+ * | IOMMU_PGTBL_DATA_NONE | N/A | * +------------------------------+-------------------------------------+ + * | IOMMU_PGTBL_DATA_VTD_S1 | struct iommu_hwpt_intel_vtd | + * +------------------------------+-------------------------------------+ */ struct iommu_hwpt_alloc { __u32 size; @@ -562,4 +567,33 @@ struct iommu_hwpt_invalidate_intel_vtd { __u64 granule_size; __u64 nb_granules; }; + +/** + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) + * @size: sizeof(struct iommu_hwpt_invalidate) + * @hwpt_id: HWPT ID of target hardware page table for the invalidation + * @data_type: One of enum iommu_pgtbl_data_type + * @data_len: Length of the type specific data + * @data_uptr: User pointer to the type specific data + * + * Invalidate the iommu cache for user-managed page table. Modifications + * on user-managed page table should be followed with this operation to + * sync the userspace with the kernel and underlying hardware. This operation + * is only needed by user-managed hw_pagetables, so the @data_type should + * never be IOMMU_PGTBL_DATA_NONE. + * + * +==============================+========================================+ + * | @data_type | Data structure in @data_uptr | + * +------------------------------+----------------------------------------+ + * | IOMMU_PGTBL_DATA_VTD_S1 | struct iommu_hwpt_invalidate_intel_vtd | + * +------------------------------+----------------------------------------+ + */ +struct iommu_hwpt_invalidate { + __u32 size; + __u32 hwpt_id; + __u32 data_type; + __u32 data_len; + __aligned_u64 data_uptr; +}; +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) #endif