vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side.
currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang.
the solution is to add a lock to make vgpu ppgtt notication as atomic operation.
Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang xiaolin.zhang@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb31c16..32e17c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { };
struct i915_virtual_gpu { + struct mutex lock; bool active; u32 caps; }; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2cd2dab..ff0b178 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) enum vgt_g2v_type msg; int i;
+ mutex_lock(&dev_priv->vgpu.lock); + if (create) atomic_inc(px_used(ppgtt->pd)); /* never remove */ else @@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
I915_WRITE(vgtif_reg(g2v_notify), msg);
+ mutex_unlock(&dev_priv->vgpu.lock); + return 0; }
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index bf2b837..7493544 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -94,6 +94,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv) dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps));
dev_priv->vgpu.active = true; + mutex_init(&dev_priv->vgpu.lock); DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
out:
Quoting Xiaolin Zhang (2019-08-23 07:57:31)
vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side.
currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang.
the solution is to add a lock to make vgpu ppgtt notication as atomic operation.
Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang xiaolin.zhang@intel.com
drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb31c16..32e17c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu {
struct mutex lock; bool active; u32 caps;
}; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2cd2dab..ff0b178 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) enum vgt_g2v_type msg; int i;
mutex_lock(&dev_priv->vgpu.lock);
if (create) atomic_inc(px_used(ppgtt->pd)); /* never remove */ else
@@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) I915_WRITE(vgtif_reg(g2v_notify), msg);
How do you know the operation is complete and it is now safe to overwrite the data regs? -Chris
On 08/23/2019 03:58 PM, Chris Wilson wrote:
Quoting Xiaolin Zhang (2019-08-23 07:57:31)
vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side.
currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang.
the solution is to add a lock to make vgpu ppgtt notication as atomic operation.
Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang xiaolin.zhang@intel.com
drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb31c16..32e17c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu {
struct mutex lock; bool active; u32 caps;
}; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2cd2dab..ff0b178 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) enum vgt_g2v_type msg; int i;
mutex_lock(&dev_priv->vgpu.lock);
if (create) atomic_inc(px_used(ppgtt->pd)); /* never remove */ else
@@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) I915_WRITE(vgtif_reg(g2v_notify), msg);
How do you know the operation is complete and it is now safe to overwrite the data regs? -Chris
by design, the data reg value is copied out to use, so as long as the action and data is operated together, how long the operation is not a issue and it is safe to overwrite the data regs with new action next time.
-BRs, Xiaolin
Quoting Zhang, Xiaolin (2019-08-23 09:07:01)
On 08/23/2019 03:58 PM, Chris Wilson wrote:
Quoting Xiaolin Zhang (2019-08-23 07:57:31)
vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side.
currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang.
the solution is to add a lock to make vgpu ppgtt notication as atomic operation.
Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang xiaolin.zhang@intel.com
drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb31c16..32e17c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu {
struct mutex lock; bool active; u32 caps;
}; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2cd2dab..ff0b178 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) enum vgt_g2v_type msg; int i;
mutex_lock(&dev_priv->vgpu.lock);
if (create) atomic_inc(px_used(ppgtt->pd)); /* never remove */ else
@@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) I915_WRITE(vgtif_reg(g2v_notify), msg);
How do you know the operation is complete and it is now safe to overwrite the data regs? -Chris
by design, the data reg value is copied out to use, so as long as the action and data is operated together, how long the operation is not a issue and it is safe to overwrite the data regs with new action next time.
When and how quickly is it copied? Consider that it will be immediately overwritten by the next packet. Does the vgpu mmio write cause the calling CPU to be trapped? -Chris
On 2019.08.23 09:35:22 +0100, Chris Wilson wrote:
Quoting Zhang, Xiaolin (2019-08-23 09:07:01)
On 08/23/2019 03:58 PM, Chris Wilson wrote:
Quoting Xiaolin Zhang (2019-08-23 07:57:31)
vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side.
currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang.
the solution is to add a lock to make vgpu ppgtt notication as atomic operation.
Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang xiaolin.zhang@intel.com
drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb31c16..32e17c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu {
struct mutex lock; bool active; u32 caps;
}; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2cd2dab..ff0b178 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) enum vgt_g2v_type msg; int i;
mutex_lock(&dev_priv->vgpu.lock);
if (create) atomic_inc(px_used(ppgtt->pd)); /* never remove */ else
@@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) I915_WRITE(vgtif_reg(g2v_notify), msg);
How do you know the operation is complete and it is now safe to overwrite the data regs? -Chris
by design, the data reg value is copied out to use, so as long as the action and data is operated together, how long the operation is not a issue and it is safe to overwrite the data regs with new action next time.
When and how quickly is it copied? Consider that it will be immediately overwritten by the next packet. Does the vgpu mmio write cause the calling CPU to be trapped?
yeah, mmio write will always be trapped, this data is got in notify mmio reg write trap handler. We need to add doc to tell that for current gvt notify interface, client needs to ensure serialized write but without data overwrite.
linux-stable-mirror@lists.linaro.org