These patches are back port for 5.10 stable. They are cherry-picked from 5.14 stable.
BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277
James Zhu (3): drm/amdkfd: separate kfd_iommu_resume from kfd_resume drm/amdgpu: add amdgpu_amdkfd_resume_iommu drm/amdgpu: move iommu_resume before ip init/resume
Lang Yu (1): drm/amd/amdkfd: adjust dummy functions' placement
Yifan Zhang (2): drm/amdgpu: init iommu after amdkfd device init drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 97 ++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 145 ++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 ++- 4 files changed, 155 insertions(+), 110 deletions(-)
From: Lang Yu Lang.Yu@amd.com
commit cd63989e0e6aa2eb66b461f2bae769e2550e47ac upstream.
Move all the dummy functions in amdgpu_amdkfd.c to amdgpu_amdkfd.h as inline functions.
Signed-off-by: Lang Yu Lang.Yu@amd.com Suggested-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Huang Rui ray.huang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 87 ------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 138 ++++++++++++++++++--- 2 files changed, 119 insertions(+), 106 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0544460653b9..b23b31dc570e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void) amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; amdgpu_amdkfd_total_mem_size *= si.mem_unit;
-#ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(); amdgpu_amdkfd_gpuvm_init_mem_limits(); -#else - ret = -ENOENT; -#endif kfd_initialized = !ret;
return ret; @@ -695,86 +691,3 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
return adev->have_atomics_support; } - -#ifndef CONFIG_HSA_AMD -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) -{ - return false; -} - -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) -{ -} - -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) -{ - return 0; -} - -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ -} - -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) -{ - return NULL; -} - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) -{ - return 0; -} - -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - unsigned int asic_type, bool vf) -{ - return NULL; -} - -bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, - const struct kgd2kfd_shared_resources *gpu_resources) -{ - return false; -} - -void kgd2kfd_device_exit(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_exit(void) -{ -} - -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) -{ -} - -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) -{ - return 0; -} - -int kgd2kfd_pre_reset(struct kfd_dev *kfd) -{ - return 0; -} - -int kgd2kfd_post_reset(struct kfd_dev *kfd) -{ - return 0; -} - -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) -{ -} - -void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) -{ -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ea391ca7f2f1..a81d9cacf9b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -94,11 +94,6 @@ enum kgd_engine_type { KGD_ENGINE_MAX };
-struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
struct amdkfd_process_info { /* List head of all VMs that belong to a KFD process */ @@ -132,8 +127,6 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); @@ -153,6 +146,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit);
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +#if IS_ENABLED(CONFIG_HSA_AMD) +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +#else +static inline +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + return false; +} + +static inline +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + return NULL; +} + +static inline +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +{ + return 0; +} + +static inline +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +{ + return 0; +} +#endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -215,8 +240,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **vm, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); @@ -236,23 +259,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, struct kgd_mem *mem, void **kptr, uint64_t *size); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); - int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); - int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, uint64_t va, void *vm, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); - -void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); - int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); +#if IS_ENABLED(CONFIG_HSA_AMD) +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +#else +static inline +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ +}
+static inline +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ +} + +static inline +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) +{ +} +#endif /* KGD2KFD callbacks */ +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); +#if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, @@ -266,11 +309,68 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -int kgd2kfd_quiesce_mm(struct mm_struct *mm); -int kgd2kfd_resume_mm(struct mm_struct *mm); -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +#else +static inline int kgd2kfd_init(void) +{ + return -ENOENT; +}
+static inline void kgd2kfd_exit(void) +{ +} + +static inline +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, + unsigned int asic_type, bool vf) +{ + return NULL; +} + +static inline +bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, + const struct kgd2kfd_shared_resources *gpu_resources) +{ + return false; +} + +static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) +{ +} + +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +{ +} + +static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +{ + return 0; +} + +static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +{ +} + +static inline +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} + +static inline +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +{ +} +#endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */
commit fefc01f042f44ede373ee66773b8238dd8fdcb55 upstream.
Separate kfd_iommu_resume from kfd_resume for fine-tuning of amdgpu device init/resume/reset/recovery sequence.
v2: squash in fix for !CONFIG_HSA_AMD
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu James.Zhu@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 ++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a81d9cacf9b8..8a402a3df412 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -305,6 +305,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); +int kgd2kfd_resume_iommu(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -343,6 +344,11 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { }
+static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5751bddc9cad..1204dae85797 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -896,17 +896,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; }
-static int kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0;
err = kfd_iommu_resume(kfd); - if (err) { + if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - return err; - } + return err; +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0;
err = kfd->dqm->ops.start(kfd->dqm); if (err) {
commit 8066008482e533e91934bee49765bf8b4a7c40db upstream.
Add amdgpu_amdkfd_resume_iommu for amdgpu.
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu James.Zhu@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + 2 files changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b23b31dc570e..fb6230c62daa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -190,6 +190,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) kgd2kfd_suspend(adev->kfd.dev, run_pm); }
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd.dev) + r = kgd2kfd_resume_iommu(adev->kfd.dev); + + return r; +} + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) { int r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8a402a3df412..32e385f287cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -121,6 +121,7 @@ int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void);
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry);
commit f02abeb0779700c308e661a412451b38962b8a0b upstream.
Separate iommu_resume from kfd_resume, and move it before other amdgpu ip init/resume.
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu James.Zhu@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 97723f2b5ece..2947bded074a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2220,6 +2220,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed;
+ r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -2913,6 +2917,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r;
+ r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -4296,6 +4304,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (!r) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_amdkfd_resume_iommu(tmp_adev); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out;
From: Yifan Zhang yifan1.zhang@amd.com
[ Upstream commit 714d9e4574d54596973ee3b0624ee4a16264d700 ]
This patch is to fix clinfo failure in Raven/Picasso:
Number of platforms: 1 Platform Profile: FULL_PROFILE Platform Version: OpenCL 2.2 AMD-APP (3364.0) Platform Name: AMD Accelerated Parallel Processing Platform Vendor: Advanced Micro Devices, Inc. Platform Extensions: cl_khr_icd cl_amd_event_callback
Platform Name: AMD Accelerated Parallel Processing Number of devices: 0
Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Tested-by: James Zhu James.Zhu@amd.com Acked-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2947bded074a..488e574f5da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2220,10 +2220,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed;
- r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -2259,6 +2255,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
+ r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + amdgpu_fru_get_product_info(adev);
init_failed:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - amdgpu_fru_get_product_info(adev);
init_failed: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd);
+ if (kgd2kfd_resume_iommu(kfd)) + goto device_iommu_error; + if (kfd_resume(kfd)) goto kfd_resume_error;
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
- amdgpu_fru_get_product_info(adev);
init_failed: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
- if (kfd_resume(kfd)) goto kfd_resume_error;
2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
greg k-h
On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
- amdgpu_fru_get_product_info(adev);
init_failed: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
- if (kfd_resume(kfd)) goto kfd_resume_error;
2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
[JZ] Yes, I should add more explanation next time.
Backport conflict fix to remove svm_migrate_init((struct amdgpu_device *)kfd->kgd);
new AMD svm feature has not been added for 5.10 So it is safe to remove it.
greg k-h
On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
- amdgpu_fru_get_product_info(adev); init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
- if (kfd_resume(kfd)) goto kfd_resume_error;
-- 2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
[JZ] Yes, I should add more explanation next time.
Backport conflict fix to remove svm_migrate_init((struct amdgpu_device *)kfd->kgd);
new AMD svm feature has not been added for 5.10 So it is safe to remove it.
No, I am talking about the fact that you fixed up a coding style fix in this backport that is not in the original commit in Linus's tree.
On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
- amdgpu_fru_get_product_info(adev); init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
- if (kfd_resume(kfd)) goto kfd_resume_error;
-- 2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
[JZ] Yes, I should add more explanation next time.
Backport conflict fix to remove svm_migrate_init((struct amdgpu_device *)kfd->kgd);
new AMD svm feature has not been added for 5.10 So it is safe to remove it.
No, I am talking about the fact that you fixed up a coding style fix in this backport that is not in the original commit in Linus's tree.
[JZ] I see. this fix is not necessary. Do you want me to send v2 with
this unnecessary coding style fix dropping for backport?
On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
- amdgpu_fru_get_product_info(adev); init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
- if (kfd_resume(kfd)) goto kfd_resume_error;
-- 2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
[JZ] Yes, I should add more explanation next time.
Backport conflict fix to remove svm_migrate_init((struct amdgpu_device *)kfd->kgd);
new AMD svm feature has not been added for 5.10 So it is safe to remove it.
No, I am talking about the fact that you fixed up a coding style fix in this backport that is not in the original commit in Linus's tree.
[JZ] I see. this fix is not necessary. Do you want me to send v2 with
this unnecessary coding style fix dropping for backport?
I took what was in Linus's tree already. Please verify that what I applied to the queue still works.
thanks,
greg k-h
On 2021-12-10 10:12 a.m., Greg Kroah-Hartman wrote:
On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
From: Yifan Zhang yifan1.zhang@amd.com
commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 init will fail. But this failure should not block amdgpu driver init.
Reported-by: youling youling257@gmail.com Tested-by: youling youling257@gmail.com Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: James Zhu James.Zhu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: James Zhu James.Zhu@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 488e574f5da1..f262c4e7a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
goto init_failed;
init_failed:amdgpu_fru_get_product_info(adev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1204dae85797..b35f0af71f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd);
- if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
if (kfd_resume(kfd)) goto kfd_resume_error;
-- 2.25.1
Like I said last time, do not change the backport unless you HAVE to. You did it here again for no good reason :(
[JZ] Yes, I should add more explanation next time.
Backport conflict fix to remove svm_migrate_init((struct amdgpu_device *)kfd->kgd);
new AMD svm feature has not been added for 5.10 So it is safe to remove it.
No, I am talking about the fact that you fixed up a coding style fix in this backport that is not in the original commit in Linus's tree.
[JZ] I see. this fix is not necessary. Do you want me to send v2 with
this unnecessary coding style fix dropping for backport?
I took what was in Linus's tree already. Please verify that what I applied to the queue still works.
[JZ] I verified it. It still work fine. Thanks for correction!
thanks,
greg k-h
On Thu, Dec 09, 2021 at 05:09:50PM -0500, James Zhu wrote:
These patches are back port for 5.10 stable. They are cherry-picked from 5.14 stable.
BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277
James Zhu (3): drm/amdkfd: separate kfd_iommu_resume from kfd_resume drm/amdgpu: add amdgpu_amdkfd_resume_iommu drm/amdgpu: move iommu_resume before ip init/resume
Lang Yu (1): drm/amd/amdkfd: adjust dummy functions' placement
Yifan Zhang (2): drm/amdgpu: init iommu after amdkfd device init drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
What has changed from the last time this series was submitted?
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org