get_modes() callback might be called asynchronously from the DRM core and
it is not synchronized with bridge_enable(), which sets proper runtime PM
state of the main DP device. Fix this by calling pm_runtime_get_sync()
before calling drm_get_edid(), which in turn calls drm_dp_i2c_xfer() and
analogix_dp_transfer() to ensure that main DP device is runtime active
when doing any access to its registers.
This fixes the following kernel issue on Samsung Exynos5250 Snow board:
Unhandled fault: imprecise external abort (0x406) at 0x00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: : 406 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 0 PID: 62 Comm: kworker/0:2 Not tainted 4.13.0-rc2-00364-g4a97a3da420b #3357
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
Workqueue: events output_poll_execute
task: edc14800 task.stack: edcb2000
PC is at analogix_dp_transfer+0x15c/0x2fc
LR is at analogix_dp_transfer+0x134/0x2fc
pc : [<c0468538>] lr : [<c0468510>] psr: 60000013
sp : edcb3be8 ip : 0000002a fp : 00000001
r10: 00000000 r9 : edcb3cd8 r8 : edcb3c40
r7 : 00000000 r6 : edd3b380 r5 : edd3b010 r4 : 00000064
r3 : 00000000 r2 : f0ad3000 r1 : edcb3c40 r0 : edd3b010
Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
Control: 10c5387d Table: 4000406a DAC: 00000051
Process kworker/0:2 (pid: 62, stack limit = 0xedcb2210)
Stack: (0xedcb3be8 to 0xedcb4000)
[<c0468538>] (analogix_dp_transfer) from [<c0424ba4>] (drm_dp_i2c_do_msg+0x8c/0x2b4)
[<c0424ba4>] (drm_dp_i2c_do_msg) from [<c0424e64>] (drm_dp_i2c_xfer+0x98/0x214)
[<c0424e64>] (drm_dp_i2c_xfer) from [<c057b2d8>] (__i2c_transfer+0x140/0x29c)
[<c057b2d8>] (__i2c_transfer) from [<c057b4a4>] (i2c_transfer+0x70/0xe4)
[<c057b4a4>] (i2c_transfer) from [<c0441de4>] (drm_do_probe_ddc_edid+0xb4/0x114)
[<c0441de4>] (drm_do_probe_ddc_edid) from [<c0441e5c>] (drm_probe_ddc+0x18/0x28)
[<c0441e5c>] (drm_probe_ddc) from [<c0445728>] (drm_get_edid+0x124/0x2d4)
[<c0445728>] (drm_get_edid) from [<c0465ea0>] (analogix_dp_get_modes+0x90/0x114)
[<c0465ea0>] (analogix_dp_get_modes) from [<c0425e8c>] (drm_helper_probe_single_connector_modes+0x198/0x68c)
[<c0425e8c>] (drm_helper_probe_single_connector_modes) from [<c04325d4>] (drm_setup_crtcs+0x1b4/0xd18)
[<c04325d4>] (drm_setup_crtcs) from [<c04344a8>] (drm_fb_helper_hotplug_event+0x94/0xd0)
[<c04344a8>] (drm_fb_helper_hotplug_event) from [<c0425a50>] (drm_kms_helper_hotplug_event+0x24/0x28)
[<c0425a50>] (drm_kms_helper_hotplug_event) from [<c04263ec>] (output_poll_execute+0x6c/0x174)
[<c04263ec>] (output_poll_execute) from [<c0136f18>] (process_one_work+0x188/0x3fc)
[<c0136f18>] (process_one_work) from [<c01371f4>] (worker_thread+0x30/0x4b8)
[<c01371f4>] (worker_thread) from [<c013daf8>] (kthread+0x128/0x164)
[<c013daf8>] (kthread) from [<c0108510>] (ret_from_fork+0x14/0x24)
Code: 0a000002 ea000009 e2544001 0a00004a (e59537c8)
---[ end trace cddc7919c79f7878 ]---
Reported-by: Misha Komarovskiy <zombah(a)gmail.com>
CC: stable(a)vger.kernel.org # v4.10+
Signed-off-by: Marek Szyprowski <m.szyprowski(a)samsung.com>
---
This issue was there from the beginning, but recent changes to DRM
core revealed it. It makes sense to backport it to patch f0a8b49c03d2
("drm/bridge: analogix dp: Fix runtime PM state on driver bind"),
which fixed similar issue on driver bind, thus I've marked it for
stable v4.10+.
Best regards
Marek Szyprowski
Samsung R&D Institute Poland
---
drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 5dd3f1cd074a..a8905049b9da 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -946,7 +946,9 @@ static int analogix_dp_get_modes(struct drm_connector *connector)
return 0;
}
+ pm_runtime_get_sync(dp->dev);
edid = drm_get_edid(connector, &dp->aux.ddc);
+ pm_runtime_put(dp->dev);
if (edid) {
drm_mode_connector_update_edid_property(&dp->connector,
edid);
--
2.14.2
>From the shrinker paths, we want to relinquish the GPU and GGTT access to
the object, releasing the backing storage back to the system for
swapout. As a part of that process we would unpin the pages, marking
them for access by the CPU (for the swapout/swapin). However, if that
process was interrupted after unbind the vma, we missed a flush of the
inflight GGTT writes before we made that GTT space available again for
reuse, with the prospect that we would redirect them to another page.
The bug dates back to the introduction of multiple GGTT vma, but the
code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma
for i915_gem_object_unbind()").
Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()")
Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e083f242b8dc..80b78fb5daac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
return ret;
- i915_gem_retire_requests(to_i915(obj->base.dev));
-
while ((vma = list_first_entry_or_null(&obj->vma_list,
struct i915_vma,
obj_link))) {
--
2.15.1
>From the shrinker paths, we want to relinquish the GPU and GGTT access to
the object, releasing the backing storage back to the system for
swapout. As a part of that process we would unpin the pages, marking
them for access by the CPU (for the swapout/swapin). However, if that
process was interrupted after unbind the vma, we missed a flush of the
inflight GGTT writes before we made that GTT space available again for
reuse, with the prospect that we would redirect them to another page.
The bug dates back to the introduction of multiple GGTT vma, but the
code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma
for i915_gem_object_unbind()").
Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()")
Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e083f242b8dc..80b78fb5daac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
return ret;
- i915_gem_retire_requests(to_i915(obj->base.dev));
-
while ((vma = list_first_entry_or_null(&obj->vma_list,
struct i915_vma,
obj_link))) {
--
2.15.1
In
commit 613051dac40da1751ab269572766d3348d45a197
Author: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Date: Wed Dec 14 00:08:06 2016 +0100
drm: locking&new iterators for connector_list
we've went to extreme lengths to make sure connector iterations works
in any context, without introducing any additional locking context.
This worked, except for a small fumble in the implementation:
When we actually race with a concurrent connector unplug event, and
our temporary connector reference turns out to be the final one, then
everything breaks: We call the connector release function from
whatever context we happen to be in, which can be an irq/atomic
context. And connector freeing grabs all kinds of locks and stuff.
Fix this by creating a specially safe put function for connetor_iter,
which (in this rare case) punts the cleanup to a worker.
Reported-by: Ben Widawsky <ben(a)bwidawsk.net>
Cc: Ben Widawsky <ben(a)bwidawsk.net>
Fixes: 613051dac40d ("drm: locking&new iterators for connector_list")
Cc: Dave Airlie <airlied(a)gmail.com>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Sean Paul <seanpaul(a)chromium.org>
Cc: <stable(a)vger.kernel.org> # v4.11+
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
drivers/gpu/drm/drm_connector.c | 28 ++++++++++++++++++++++++++--
drivers/gpu/drm/drm_mode_config.c | 2 ++
include/drm/drm_connector.h | 8 ++++++++
3 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 25f4b2e9a44f..482014137953 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -152,6 +152,16 @@ static void drm_connector_free(struct kref *kref)
connector->funcs->destroy(connector);
}
+static void drm_connector_free_work_fn(struct work_struct *work)
+{
+ struct drm_connector *connector =
+ container_of(work, struct drm_connector, free_work);
+ struct drm_device *dev = connector->dev;
+
+ drm_mode_object_unregister(dev, &connector->base);
+ connector->funcs->destroy(connector);
+}
+
/**
* drm_connector_init - Init a preallocated connector
* @dev: DRM device
@@ -181,6 +191,8 @@ int drm_connector_init(struct drm_device *dev,
if (ret)
return ret;
+ INIT_WORK(&connector->free_work, drm_connector_free_work_fn);
+
connector->base.properties = &connector->properties;
connector->dev = dev;
connector->funcs = funcs;
@@ -529,6 +541,18 @@ void drm_connector_list_iter_begin(struct drm_device *dev,
}
EXPORT_SYMBOL(drm_connector_list_iter_begin);
+/*
+ * Extra-safe connector put function that works in any context. Should only be
+ * used from the connector_iter functions, where we never really expect to
+ * actually release the connector when dropping our final reference.
+ */
+static void
+drm_connector_put_safe(struct drm_connector *conn)
+{
+ if (refcount_dec_and_test(&conn->base.refcount.refcount))
+ schedule_work(&conn->free_work);
+}
+
/**
* drm_connector_list_iter_next - return next connector
* @iter: connectr_list iterator
@@ -561,7 +585,7 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter)
spin_unlock_irqrestore(&config->connector_list_lock, flags);
if (old_conn)
- drm_connector_put(old_conn);
+ drm_connector_put_safe(old_conn);
return iter->conn;
}
@@ -580,7 +604,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
{
iter->dev = NULL;
if (iter->conn)
- drm_connector_put(iter->conn);
+ drm_connector_put_safe(iter->conn);
lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
}
EXPORT_SYMBOL(drm_connector_list_iter_end);
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 7623607c0f1e..346c19c6ce01 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -431,6 +431,8 @@ void drm_mode_config_cleanup(struct drm_device *dev)
drm_connector_put(connector);
}
drm_connector_list_iter_end(&conn_iter);
+ /* connector_iter drops references in a work item. */
+ flush_scheduled_work();
if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter)
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 66d6c99d15e5..c5c753a1be85 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -926,6 +926,14 @@ struct drm_connector {
uint8_t num_h_tile, num_v_tile;
uint8_t tile_h_loc, tile_v_loc;
uint16_t tile_h_size, tile_v_size;
+
+ /**
+ * @free_work:
+ *
+ * Work used only by &drm_connector_iter to be able to clean up a
+ * connector from any context.
+ */
+ struct work_struct free_work;
};
#define obj_to_connector(x) container_of(x, struct drm_connector, base)
--
2.15.0
On 04.12.2017 23:10, rwarsow(a)gmx.de wrote:
> Hallo
>
> someone and I got an regression with e1000e since kernel 4.14.3 and it seems there is 4.14.4 on the way without a fix.
>
>
> bug report is here:
>
> https://bugzilla.kernel.org/show_bug.cgi?id=198047
( added stable and netdev to CC )
Yes I have a box with e1000e and it seems something at least breaks NM after 4.14.3.
Interesting here , when using connman the connection is stable.
Regards,
Gabriel C