From: Nicolin Chen nicolinc@nvidia.com Sent: Saturday, January 11, 2025 11:32 AM
@@ -294,7 +294,9 @@ struct iommu_ioas_unmap {
/**
- enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
ioctl(IOMMU_OPTION_HUGE_PAGES)
ioctl(IOMMU_OPTION_HUGE_PAGES) and
ioctl(IOMMU_OPTION_SW_MSI_START) and
ioctl(IOMMU_OPTION_SW_MSI_SIZE)
- @IOMMU_OPTION_RLIMIT_MODE:
- Change how RLIMIT_MEMLOCK accounting works. The caller must have
privilege
- to invoke this. Value 0 (default) is user based accounting, 1 uses process
@@ -304,10 +306,24 @@ struct iommu_ioas_unmap {
- iommu mappings. Value 0 disables combining, everything is mapped to
- PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
- option, the object_id must be the IOAS ID.
- @IOMMU_OPTION_SW_MSI_START:
- Change the base address of the IOMMU mapping region for MSI
doorbell(s).
- It must be set this before attaching a device to an IOAS/HWPT,
remove 'this'
otherwise
- this option will be not effective on that IOAS/HWPT. User can
Do we want to explicitly check this instead of leaving it no effect silently?
choose to
- let kernel pick a base address, by simply ignoring this option or setting
- a value 0 to IOMMU_OPTION_SW_MSI_SIZE. Global option, object_id
must be 0
- @IOMMU_OPTION_SW_MSI_SIZE:
- Change the size of the IOMMU mapping region for MSI doorbell(s). It
must
- be set this before attaching a device to an IOAS/HWPT, otherwise it
won't
- be effective on that IOAS/HWPT. The value is in MB, and the minimum
value
- is 1 MB. A value 0 (default) will invalidate the MSI doorbell base address
- value set to IOMMU_OPTION_SW_MSI_START. Global option, object_id
must be 0
hmm there is no check on the minimal value and enable the effect of value 0 in this patch.
iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) {
struct iommufd_ctx *ictx = idev->ictx; int rc;
lockdep_assert_held(&idev->igroup->lock);
/* Override it with a user-programmed SW_MSI region */
if (ictx->sw_msi_size && ictx->sw_msi_start != PHYS_ADDR_MAX)
idev->igroup->sw_msi_start = ictx->sw_msi_start;
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, &idev->igroup-
sw_msi_start);
what about moving above additions into iopt_table_enforce_dev_resv_regions() which is all about finding a sw_msi address and can check the user setting internally?
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 8a790e597e12..5d7f5ca1eecf 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1446,7 +1446,9 @@ int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, if (sw_msi_start && resv->type == IOMMU_RESV_MSI) num_hw_msi++; if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
*sw_msi_start = resv->start;
/* Bypass the driver-defined SW_MSI region, if preset
*/
if (*sw_msi_start == PHYS_ADDR_MAX)
*sw_msi_start = resv->start;
the code is not about bypass. Instead it's to use the driver-defined region if user doesn't set it.