The patch below does not apply to the 4.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 764baba80168ad3adafb521d2ab483ccbc49e344 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il(a)gmail.com>
Date: Sun, 4 Feb 2018 15:35:09 +0200
Subject: [PATCH] ovl: hash non-dir by lower inode for fsnotify
Commit 31747eda41ef ("ovl: hash directory inodes for fsnotify")
fixed an issue of inotify watch on directory that stops getting
events after dropping dentry caches.
A similar issue exists for non-dir non-upper files, for example:
$ mkdir -p lower upper work merged
$ touch lower/foo
$ mount -t overlay -o
lowerdir=lower,workdir=work,upperdir=upper none merged
$ inotifywait merged/foo &
$ echo 2 > /proc/sys/vm/drop_caches
$ cat merged/foo
inotifywait doesn't get the OPEN event, because ovl_lookup() called
from 'cat' allocates a new overlay inode and does not reuse the
watched inode.
Fix this by hashing non-dir overlay inodes by lower real inode in
the following cases that were not hashed before this change:
- A non-upper overlay mount
- A lower non-hardlink when index=off
A helper ovl_hash_bylower() was added to put all the logic and
documentation about which real inode an overlay inode is hashed by
into one place.
The issue dates back to initial version of overlayfs, but this
patch depends on ovl_inode code that was introduced in kernel v4.13.
Cc: <stable(a)vger.kernel.org> #v4.13
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index fcd97b783fa1..3b1bd469accd 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
return inode;
}
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+ struct dentry *lower, struct dentry *index)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ /* No, if pure upper */
+ if (!lower)
+ return false;
+
+ /* Yes, if already indexed */
+ if (index)
+ return true;
+
+ /* Yes, if won't be copied up */
+ if (!ofs->upper_mnt)
+ return true;
+
+ /* No, if lower hardlink is or will be broken on copy up */
+ if ((upper || !ovl_indexdir(sb)) &&
+ !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+ return false;
+
+ /* No, if non-indexed upper with NFS export */
+ if (sb->s_export_op && upper)
+ return false;
+
+ /* Otherwise, hash by lower inode for fsnotify */
+ return true;
+}
+
struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
struct dentry *lowerdentry, struct dentry *index,
unsigned int numlower)
{
- struct ovl_fs *ofs = sb->s_fs_info;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
- /* Already indexed or could be indexed on copy up? */
- bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
- struct dentry *origin = indexed ? lowerdentry : NULL;
+ bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
bool is_dir;
- if (WARN_ON(upperdentry && indexed && !lowerdentry))
- return ERR_PTR(-EIO);
-
if (!realinode)
realinode = d_inode(lowerdentry);
/*
- * Copy up origin (lower) may exist for non-indexed non-dir upper, but
- * we must not use lower as hash key in that case.
- * Hash non-dir that is or could be indexed by origin inode.
- * Hash dir that is or could be merged by origin inode.
- * Hash pure upper and non-indexed non-dir by upper inode.
- * Hash non-indexed dir by upper inode for NFS export.
+ * Copy up origin (lower) may exist for non-indexed upper, but we must
+ * not use lower as hash key if this is a broken hardlink.
*/
is_dir = S_ISDIR(realinode->i_mode);
- if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
- origin = lowerdentry;
-
- if (upperdentry || origin) {
- struct inode *key = d_inode(origin ?: upperdentry);
+ if (upperdentry || bylower) {
+ struct inode *key = d_inode(bylower ? lowerdentry :
+ upperdentry);
unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
inode = iget5_locked(sb, (unsigned long) key,
@@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
set_nlink(inode, nlink);
} else {
+ /* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
if (!inode)
goto out_nomem;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d61a5c1063515e855bedb1b81e20e50b0ac3541e Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Sun, 11 Feb 2018 10:38:28 +0100
Subject: [PATCH] drm/nouveau: Fix deadlock on runtime suspend
nouveau's ->runtime_suspend hook calls drm_kms_helper_poll_disable(),
which waits for the output poll worker to finish if it's running.
The output poll worker meanwhile calls pm_runtime_get_sync() in
nouveau_connector_detect() which waits for the ongoing suspend to finish,
causing a deadlock.
Fix by not acquiring a runtime PM ref if nouveau_connector_detect() is
called in the output poll worker's context. This is safe because
the poll worker is only enabled while runtime active and we know that
->runtime_suspend waits for it to finish.
Other contexts calling nouveau_connector_detect() do require a runtime
PM ref, these comprise:
status_store() drm sysfs interface
->fill_modes drm callback
drm_fb_helper_probe_connector_modes()
drm_mode_getconnector()
nouveau_connector_hotplug()
nouveau_display_hpd_work()
nv17_tv_set_property()
Stack trace for posterity:
INFO: task kworker/0:1:58 blocked for more than 120 seconds.
Workqueue: events output_poll_execute [drm_kms_helper]
Call Trace:
schedule+0x28/0x80
rpm_resume+0x107/0x6e0
__pm_runtime_resume+0x47/0x70
nouveau_connector_detect+0x7e/0x4a0 [nouveau]
nouveau_connector_detect_lvds+0x132/0x180 [nouveau]
drm_helper_probe_detect_ctx+0x85/0xd0 [drm_kms_helper]
output_poll_execute+0x11e/0x1c0 [drm_kms_helper]
process_one_work+0x184/0x380
worker_thread+0x2e/0x390
INFO: task kworker/0:2:252 blocked for more than 120 seconds.
Workqueue: pm pm_runtime_work
Call Trace:
schedule+0x28/0x80
schedule_timeout+0x1e3/0x370
wait_for_completion+0x123/0x190
flush_work+0x142/0x1c0
nouveau_pmops_runtime_suspend+0x7e/0xd0 [nouveau]
pci_pm_runtime_suspend+0x5c/0x180
vga_switcheroo_runtime_suspend+0x1e/0xa0
__rpm_callback+0xc1/0x200
rpm_callback+0x1f/0x70
rpm_suspend+0x13c/0x640
pm_runtime_work+0x6e/0x90
process_one_work+0x184/0x380
worker_thread+0x2e/0x390
Bugzilla: https://bugs.archlinux.org/task/53497
Bugzilla: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=870523
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388#c33
Fixes: 5addcf0a5f0f ("nouveau: add runtime PM support (v0.9)")
Cc: stable(a)vger.kernel.org # v3.12+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct
Cc: stable(a)vger.kernel.org # v3.12+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker
Cc: Ben Skeggs <bskeggs(a)redhat.com>
Cc: Dave Airlie <airlied(a)redhat.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/b7d2cbb609a80f59ccabfdf479b9d…
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 69d6e61a01ec..6ed9cb053dfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -570,9 +570,15 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
nv_connector->edid = NULL;
}
- ret = pm_runtime_get_sync(connector->dev->dev);
- if (ret < 0 && ret != -EACCES)
- return conn_status;
+ /* Outputs are only polled while runtime active, so acquiring a
+ * runtime PM ref here is unnecessary (and would deadlock upon
+ * runtime suspend because it waits for polling to finish).
+ */
+ if (!drm_kms_helper_is_poll_worker()) {
+ ret = pm_runtime_get_sync(connector->dev->dev);
+ if (ret < 0 && ret != -EACCES)
+ return conn_status;
+ }
nv_encoder = nouveau_connector_ddc_detect(connector);
if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
@@ -647,8 +653,10 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return conn_status;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Tue, 30 Jan 2018 16:29:38 +0200
Subject: [PATCH] drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq
changing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently we see sporadic timeouts during CDCLK changing both on BXT and
GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by
changing the frequency in a tight loop after blanking the display. The
upper bound for the completion time is 800us based on my tests, so
increase it from the current 500us to 2ms; with that I couldn't trigger
the problem either on BXT or GLK.
Note that timeouts happened during both the change notification and the
voltage level setting PCODE request. (For the latter one BSpec doesn't
require us to wait for completion before further HW programming.)
This issue is similar to
commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK
change notification")
but there the PCODE request does complete (as shown by the mbox
busy flag), only the reply we get from PCODE indicates a failure.
So there we keep resending the request until a success reply, here we
just have to increase the timeout for the one PCODE request we send.
v2:
- s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v4.4+
Acked-by: Chris Wilson <chris(a)chris-wilson.co.uk> (v1)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.d…
(cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..a42deebedb0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3717,7 +3717,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+ u32 val, int timeout_us);
+#define sandybridge_pcode_write(dev_priv, mbox, val) \
+ sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500)
+
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 reply_mask, u32 reply, int timeout_base_ms);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index d77e2bec1e29..5dc118f26b51 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1370,10 +1370,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
break;
}
- /* Inform power controller of upcoming frequency change */
+ /*
+ * Inform power controller of upcoming frequency change. BSpec
+ * requires us to wait up to 150usec, but that leads to timeouts;
+ * the 2ms used here is based on experiment.
+ */
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- 0x80000000);
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ 0x80000000, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
@@ -1404,8 +1409,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- cdclk_state->voltage_level);
+ /*
+ * The timeout isn't specified, the 2ms used here is based on
+ * experiment.
+ * FIXME: Waiting for the request completion could be delayed until
+ * the next PCODE request based on BSpec.
+ */
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ cdclk_state->voltage_level, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db79a860b96..1a6e699e19e0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9149,8 +9149,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
return 0;
}
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
- u32 mbox, u32 val)
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+ u32 mbox, u32 val, int timeout_us)
{
int status;
@@ -9173,7 +9173,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
if (__intel_wait_for_register_fw(dev_priv,
GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
- 500, 0, NULL)) {
+ timeout_us, 0, NULL)) {
DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
val, mbox, __builtin_return_address(0));
return -ETIMEDOUT;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Tue, 30 Jan 2018 16:29:38 +0200
Subject: [PATCH] drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq
changing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently we see sporadic timeouts during CDCLK changing both on BXT and
GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by
changing the frequency in a tight loop after blanking the display. The
upper bound for the completion time is 800us based on my tests, so
increase it from the current 500us to 2ms; with that I couldn't trigger
the problem either on BXT or GLK.
Note that timeouts happened during both the change notification and the
voltage level setting PCODE request. (For the latter one BSpec doesn't
require us to wait for completion before further HW programming.)
This issue is similar to
commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK
change notification")
but there the PCODE request does complete (as shown by the mbox
busy flag), only the reply we get from PCODE indicates a failure.
So there we keep resending the request until a success reply, here we
just have to increase the timeout for the one PCODE request we send.
v2:
- s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v4.4+
Acked-by: Chris Wilson <chris(a)chris-wilson.co.uk> (v1)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.d…
(cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..a42deebedb0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3717,7 +3717,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+ u32 val, int timeout_us);
+#define sandybridge_pcode_write(dev_priv, mbox, val) \
+ sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500)
+
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 reply_mask, u32 reply, int timeout_base_ms);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index d77e2bec1e29..5dc118f26b51 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1370,10 +1370,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
break;
}
- /* Inform power controller of upcoming frequency change */
+ /*
+ * Inform power controller of upcoming frequency change. BSpec
+ * requires us to wait up to 150usec, but that leads to timeouts;
+ * the 2ms used here is based on experiment.
+ */
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- 0x80000000);
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ 0x80000000, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
@@ -1404,8 +1409,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- cdclk_state->voltage_level);
+ /*
+ * The timeout isn't specified, the 2ms used here is based on
+ * experiment.
+ * FIXME: Waiting for the request completion could be delayed until
+ * the next PCODE request based on BSpec.
+ */
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ cdclk_state->voltage_level, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db79a860b96..1a6e699e19e0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9149,8 +9149,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
return 0;
}
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
- u32 mbox, u32 val)
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+ u32 mbox, u32 val, int timeout_us)
{
int status;
@@ -9173,7 +9173,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
if (__intel_wait_for_register_fw(dev_priv,
GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
- 500, 0, NULL)) {
+ timeout_us, 0, NULL)) {
DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
val, mbox, __builtin_return_address(0));
return -ETIMEDOUT;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Tue, 30 Jan 2018 16:29:38 +0200
Subject: [PATCH] drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq
changing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently we see sporadic timeouts during CDCLK changing both on BXT and
GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by
changing the frequency in a tight loop after blanking the display. The
upper bound for the completion time is 800us based on my tests, so
increase it from the current 500us to 2ms; with that I couldn't trigger
the problem either on BXT or GLK.
Note that timeouts happened during both the change notification and the
voltage level setting PCODE request. (For the latter one BSpec doesn't
require us to wait for completion before further HW programming.)
This issue is similar to
commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK
change notification")
but there the PCODE request does complete (as shown by the mbox
busy flag), only the reply we get from PCODE indicates a failure.
So there we keep resending the request until a success reply, here we
just have to increase the timeout for the one PCODE request we send.
v2:
- s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v4.4+
Acked-by: Chris Wilson <chris(a)chris-wilson.co.uk> (v1)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.d…
(cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..a42deebedb0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3717,7 +3717,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+ u32 val, int timeout_us);
+#define sandybridge_pcode_write(dev_priv, mbox, val) \
+ sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500)
+
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 reply_mask, u32 reply, int timeout_base_ms);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index d77e2bec1e29..5dc118f26b51 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1370,10 +1370,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
break;
}
- /* Inform power controller of upcoming frequency change */
+ /*
+ * Inform power controller of upcoming frequency change. BSpec
+ * requires us to wait up to 150usec, but that leads to timeouts;
+ * the 2ms used here is based on experiment.
+ */
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- 0x80000000);
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ 0x80000000, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
@@ -1404,8 +1409,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- cdclk_state->voltage_level);
+ /*
+ * The timeout isn't specified, the 2ms used here is based on
+ * experiment.
+ * FIXME: Waiting for the request completion could be delayed until
+ * the next PCODE request based on BSpec.
+ */
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ cdclk_state->voltage_level, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db79a860b96..1a6e699e19e0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9149,8 +9149,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
return 0;
}
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
- u32 mbox, u32 val)
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+ u32 mbox, u32 val, int timeout_us)
{
int status;
@@ -9173,7 +9173,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
if (__intel_wait_for_register_fw(dev_priv,
GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
- 500, 0, NULL)) {
+ timeout_us, 0, NULL)) {
DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
val, mbox, __builtin_return_address(0));
return -ETIMEDOUT;
The patch below does not apply to the 4.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Tue, 30 Jan 2018 16:29:38 +0200
Subject: [PATCH] drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq
changing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Currently we see sporadic timeouts during CDCLK changing both on BXT and
GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by
changing the frequency in a tight loop after blanking the display. The
upper bound for the completion time is 800us based on my tests, so
increase it from the current 500us to 2ms; with that I couldn't trigger
the problem either on BXT or GLK.
Note that timeouts happened during both the change notification and the
voltage level setting PCODE request. (For the latter one BSpec doesn't
require us to wait for completion before further HW programming.)
This issue is similar to
commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK
change notification")
but there the PCODE request does complete (as shown by the mbox
busy flag), only the reply we get from PCODE indicates a failure.
So there we keep resending the request until a success reply, here we
just have to increase the timeout for the one PCODE request we send.
v2:
- s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v4.4+
Acked-by: Chris Wilson <chris(a)chris-wilson.co.uk> (v1)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.d…
(cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..a42deebedb0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3717,7 +3717,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+ u32 val, int timeout_us);
+#define sandybridge_pcode_write(dev_priv, mbox, val) \
+ sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500)
+
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 reply_mask, u32 reply, int timeout_base_ms);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index d77e2bec1e29..5dc118f26b51 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1370,10 +1370,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
break;
}
- /* Inform power controller of upcoming frequency change */
+ /*
+ * Inform power controller of upcoming frequency change. BSpec
+ * requires us to wait up to 150usec, but that leads to timeouts;
+ * the 2ms used here is based on experiment.
+ */
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- 0x80000000);
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ 0x80000000, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
@@ -1404,8 +1409,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- cdclk_state->voltage_level);
+ /*
+ * The timeout isn't specified, the 2ms used here is based on
+ * experiment.
+ * FIXME: Waiting for the request completion could be delayed until
+ * the next PCODE request based on BSpec.
+ */
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ cdclk_state->voltage_level, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db79a860b96..1a6e699e19e0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9149,8 +9149,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
return 0;
}
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
- u32 mbox, u32 val)
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+ u32 mbox, u32 val, int timeout_us)
{
int status;
@@ -9173,7 +9173,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
if (__intel_wait_for_register_fw(dev_priv,
GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
- 500, 0, NULL)) {
+ timeout_us, 0, NULL)) {
DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
val, mbox, __builtin_return_address(0));
return -ETIMEDOUT;
The patch below does not apply to the 4.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f616f2830c1ed79245cfeca900f7e8a3b3c08c06 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin(a)intel.com>
Date: Thu, 1 Mar 2018 11:06:13 +0000
Subject: [PATCH] drm/i915/perf: fix perf stream opening lock
We're seeing on CI that some contexts don't have the programmed OA
period timer that directs the OA unit on how often to write reports.
The issue is that we're not holding the drm lock from when we edit the
context images down to when we set the exclusive_stream variable. This
leaves a window for the deferred context allocation to call
i915_oa_init_reg_state() that will not program the expected OA timer
value, because we haven't set the exclusive_stream yet.
v2: Drop need_lock from gen8_configure_all_contexts() (Matt)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin(a)intel.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
Link: https://patchwork.freedesktop.org/patch/msgid/20180301110613.1737-1-lionel.…
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.14+
(cherry picked from commit 41d3fdcd15d5ecf29cc73e8b79c2327ebb54b960)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0be50e43507d..f8fe5ffcdcff 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1303,9 +1303,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
*/
mutex_lock(&dev_priv->drm.struct_mutex);
dev_priv->perf.oa.exclusive_stream = NULL;
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
free_oa_buffer(dev_priv);
@@ -1756,22 +1755,13 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
* Note: it's only the RCS/Render context that has any OA state.
*/
static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
- const struct i915_oa_config *oa_config,
- bool interruptible)
+ const struct i915_oa_config *oa_config)
{
struct i915_gem_context *ctx;
int ret;
unsigned int wait_flags = I915_WAIT_LOCKED;
- if (interruptible) {
- ret = i915_mutex_lock_interruptible(&dev_priv->drm);
- if (ret)
- return ret;
-
- wait_flags |= I915_WAIT_INTERRUPTIBLE;
- } else {
- mutex_lock(&dev_priv->drm.struct_mutex);
- }
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
/* Switch away from any user context. */
ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
@@ -1819,8 +1809,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
}
out:
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
return ret;
}
@@ -1863,7 +1851,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
+ ret = gen8_configure_all_contexts(dev_priv, oa_config);
if (ret)
return ret;
@@ -1878,7 +1866,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
{
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL, false);
+ gen8_configure_all_contexts(dev_priv, NULL);
I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
~GT_NOA_ENABLE));
@@ -1888,7 +1876,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
{
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL, false);
+ gen8_configure_all_contexts(dev_priv, NULL);
/* Make sure we disable noa to save power. */
I915_WRITE(RPM_CONFIG1,
@@ -2138,6 +2126,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (ret)
goto err_oa_buf_alloc;
+ ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+ if (ret)
+ goto err_lock;
+
ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
stream->oa_config);
if (ret)
@@ -2145,23 +2137,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->ops = &i915_oa_stream_ops;
- /* Lock device for exclusive_stream access late because
- * enable_metric_set() might lock as well on gen8+.
- */
- ret = i915_mutex_lock_interruptible(&dev_priv->drm);
- if (ret)
- goto err_lock;
-
dev_priv->perf.oa.exclusive_stream = stream;
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
-err_lock:
+err_enable:
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
-err_enable:
+err_lock:
free_oa_buffer(dev_priv);
err_oa_buf_alloc: