Hi Nicolin,
On 1/11/25 4:32 AM, Nicolin Chen wrote:
From: Jason Gunthorpe jgg@nvidia.com
All the iommu cases simply want to override the MSI page's address with
those which translate MSIs
the IOVA that was mapped through the iommu. This doesn't need a cookie pointer, we just need to store the IOVA and its page size in the msi_desc.
Instead provide msi_desc_set_iommu_msi_iova() which allows the IOMMU side to specify the IOVA that the MSI page is placed during iommu_dma_prepare(). This is stored in the msi_desc and then
iommu_dma_prepare_msi()
iommu_dma_compose_msi_msg() is a simple inline that sets address_hi/lo.
The next patch will correct the naming.
This is done because we cannot correctly lock access to group->domain in the atomic context that iommu_dma_compose_msi_msg() is called under. Today the locking miss is tolerable because dma_iommu.c operates under an assumption that the domain does not change while a driver is probed.
However iommufd now permits the domain to change while the driver is probed and VFIO userspace can create races with IRQ changes calling iommu_dma_prepare/compose_msi_msg() and changing/freeing the iommu_domain.
and is it safe in iommu_dma_prepare_msi()?
Removing the pointer, and critically, the call to iommu_get_domain_for_dev() during compose resolves this race.
Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Nicolin Chen nicolinc@nvidia.com
include/linux/iommu.h | 6 ------ include/linux/msi.h | 45 +++++++++++++++++++++++---------------- drivers/iommu/dma-iommu.c | 30 +++++--------------------- 3 files changed, 32 insertions(+), 49 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 318d27841130..3a4215966c1b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1513,7 +1513,6 @@ static inline void iommu_debugfs_setup(void) {} int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); #else /* CONFIG_IOMMU_DMA */ @@ -1529,11 +1528,6 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a { return 0; }
-static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ -}
#endif /* CONFIG_IOMMU_DMA */ /* diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00e..d442b4a69d56 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -184,7 +184,8 @@ struct msi_desc { struct msi_msg msg; struct irq_affinity_desc *affinity; #ifdef CONFIG_IRQ_MSI_IOMMU
- const void *iommu_cookie;
you may add kernel doc comments above
- u64 iommu_msi_iova : 58;
- u64 iommu_msi_page_shift : 6;
#endif #ifdef CONFIG_SYSFS struct device_attribute *sysfs_attrs; @@ -285,28 +286,36 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, #define msi_desc_to_dev(desc) ((desc)->dev) -#ifdef CONFIG_IRQ_MSI_IOMMU -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -{
- return desc->iommu_cookie;
-}
-static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc,
const void *iommu_cookie)
+static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc,
u64 msi_iova,
unsigned int page_shift)
{
- desc->iommu_cookie = iommu_cookie;
-} -#else -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -{
- return NULL;
+#ifdef CONFIG_IRQ_MSI_IOMMU
- desc->iommu_msi_iova = msi_iova >> page_shift;
- desc->iommu_msi_page_shift = page_shift;
+#endif } -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc,
const void *iommu_cookie)
+/**
- iommu_dma_compose_msi_msg() - Apply translation to an MSI message
- @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
- @msg: MSI message containing target physical address
- */
+static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc,
struct msi_msg *msg)
{ -} +#ifdef CONFIG_IRQ_MSI_IOMMU
- if (desc->iommu_msi_page_shift) {
u64 msi_iova = desc->iommu_msi_iova
<< desc->iommu_msi_page_shift;
msg->address_hi = upper_32_bits(msi_iova);
msg->address_lo = lower_32_bits(msi_iova) |
(msg->address_lo &
((1 << desc->iommu_msi_page_shift) - 1));
- }
#endif +} int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, struct msi_desc *init_desc); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2a9fa0c8cc00..bf91e014d179 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1815,7 +1815,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) static DEFINE_MUTEX(msi_prepare_lock); /* see below */ if (!domain || !domain->iova_cookie) {
desc->iommu_cookie = NULL;
return 0; }msi_desc_set_iommu_msi_iova(desc, 0, 0);
@@ -1827,33 +1827,13 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) mutex_lock(&msi_prepare_lock); msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); mutex_unlock(&msi_prepare_lock);
- msi_desc_set_iommu_cookie(desc, msi_page);
- if (!msi_page) return -ENOMEM;
- return 0;
-} -/**
- iommu_dma_compose_msi_msg() - Apply translation to an MSI message
- @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
- @msg: MSI message containing target physical address
- */
-void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{
- struct device *dev = msi_desc_to_dev(desc);
- const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- const struct iommu_dma_msi_page *msi_page;
- msi_page = msi_desc_get_iommu_cookie(desc);
- if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
return;
- msg->address_hi = upper_32_bits(msi_page->iova);
- msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
- msg->address_lo += lower_32_bits(msi_page->iova);
- msi_desc_set_iommu_msi_iova(
desc, msi_page->iova,
ilog2(cookie_msi_granule(domain->iova_cookie)));
- return 0;
} static int iommu_dma_init(void)