UCSI specification quite clearly states that if a command
can't be completed in 10ms, the firmware must notify
about BUSY condition. Unfortunately almost none of the
platforms (the firmware on them) generate the BUSY
notification even if a command can't be completed in time.
The driver already considered that, and used a timeout
value of 5 seconds, but processing especially the alternate
mode discovery commands takes often considerable amount of
time from the firmware, much more than the 5 seconds. That
happens especially after bootup when devices are already
connected to the USB Type-C connector. For now on those
platforms the alternate mode discovery has simply failed
because of the timeout.
To improve the situation, increasing the timeout value for
the command completion to 1 minute. That should give enough
time for even the slowest firmware to process the commands.
Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API")
Cc: stable(a)vger.kernel.org
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index c0aca2f0f23f0..fbfe8f5933af8 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
if (ret)
goto out_clear_bit;
- if (!wait_for_completion_timeout(&ua->complete, msecs_to_jiffies(5000)))
+ if (!wait_for_completion_timeout(&ua->complete, 60 * HZ))
ret = -ETIMEDOUT;
out_clear_bit:
--
2.28.0
This reverts commit 116ac378bb3ff844df333e7609e7604651a0db9d.
This commit causes the kernel to oops and reboot when injecting a SLB
multihit which causes a MCE.
Before this commit a SLB multihit was corrected by the kernel and the
system continued to operate normally.
cc: stable(a)vger.kernel.org
Fixes: 116ac378bb3f ("powerpc/64s: machine check interrupt update NMI accounting")
Signed-off-by: Michal Suchanek <msuchanek(a)suse.de>
---
arch/powerpc/kernel/mce.c | 7 -------
arch/powerpc/kernel/traps.c | 18 +++---------------
2 files changed, 3 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ada59f6c4298..2e13528dcc92 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -591,14 +591,10 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
long notrace machine_check_early(struct pt_regs *regs)
{
long handled = 0;
- bool nested = in_nmi();
u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
this_cpu_set_ftrace_enabled(0);
- if (!nested)
- nmi_enter();
-
hv_nmi_check_nonrecoverable(regs);
/*
@@ -607,9 +603,6 @@ long notrace machine_check_early(struct pt_regs *regs)
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
- if (!nested)
- nmi_exit();
-
this_cpu_set_ftrace_enabled(ftrace_enabled);
return handled;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d1ebe152f210..7853b770918d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -827,19 +827,7 @@ void machine_check_exception(struct pt_regs *regs)
{
int recover = 0;
- /*
- * BOOK3S_64 does not call this handler as a non-maskable interrupt
- * (it uses its own early real-mode handler to handle the MCE proper
- * and then raises irq_work to call this handler when interrupts are
- * enabled).
- *
- * This is silly. The BOOK3S_64 should just call a different function
- * rather than expecting semantics to magically change. Something
- * like 'non_nmi_machine_check_exception()', perhaps?
- */
- const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64);
-
- if (nmi) nmi_enter();
+ nmi_enter();
__this_cpu_inc(irq_stat.mce_exceptions);
@@ -865,7 +853,7 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
- if (nmi) nmi_exit();
+ nmi_exit();
die("Machine check", regs, SIGBUS);
@@ -876,7 +864,7 @@ void machine_check_exception(struct pt_regs *regs)
return;
bail:
- if (nmi) nmi_exit();
+ nmi_exit();
}
void SMIException(struct pt_regs *regs)
--
2.28.0
On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.
However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:
513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang(a)intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang(a)intel.com>
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v5.4
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d6e0f62337b4..d75712a503b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
*/
static inline bool gen12_csb_parse(const u64 *csb)
{
- u64 entry = READ_ONCE(*csb);
- bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
- bool new_queue =
+ bool ctx_away_valid;
+ bool new_queue;
+ u64 entry;
+
+ /* HSD#22011248461 */
+ entry = READ_ONCE(*csb);
+ if (unlikely(entry == -1)) {
+ preempt_disable();
+ if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+ GEM_WARN_ON("50us CSB timeout");
+ preempt_enable();
+ }
+ WRITE_ONCE(*(u64 *)csb, -1);
+
+ ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+ new_queue =
lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
@@ -4004,6 +4017,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /* Check that the GPU does indeed update the CSB entries! */
+ memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
--
2.20.1
An unfortunate sequence of events, but it turns out there is a valid
usecase for being able to free/decouple the driver objects before they
are freed by the DRM core. In particular, if we have a pointer into a
drm core object from inside a driver object, that pointer needs to be
nerfed *before* it is freed so that concurrent access (e.g. debugfs)
does not following the dangling pointer.
The legacy marker was adding in the code movement from drp_fops.c to
drm_file.c
References: 9acdac68bcdc ("drm: rename drm_fops.c to drm_file.c")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter(a)intel.com>
Cc: Gustavo Padovan <gustavo.padovan(a)collabora.com>
Cc: CQ Tang <cq.tang(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v4.12+
---
drivers/gpu/drm/drm_file.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566ae3f4..7b4258d6f7cc 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -258,8 +258,7 @@ void drm_file_free(struct drm_file *file)
(long)old_encode_dev(file->minor->kdev->devt),
atomic_read(&dev->open_count));
- if (drm_core_check_feature(dev, DRIVER_LEGACY) &&
- dev->driver->preclose)
+ if (dev->driver->preclose)
dev->driver->preclose(dev, file);
if (drm_core_check_feature(dev, DRIVER_LEGACY))
--
2.20.1
On Wed, Sep 16, 2020 at 12:40:30AM +0100, Andrew Cooper wrote:
> It's worse than that. Even when stating that %rsp is modified in the
> asm, the generated code sequence is still buggy, for recent Clang and GCC.
>
> https://godbolt.org/z/ccz9v7
>
> It's clearly not safe to ever use memory operands with pushf/popf asm
> fragments.
So I went and singlestepped your snippet in gdb. And it all seems to
work - it is simply a bit confusing: :-)
eflags 0x246 [ PF ZF IF ]
=> 0x000055555555505d <main+13>: 9c pushfq
0x7fffffffe440: 0x00007fffffffe540 0x0000000000000000
0x7fffffffe450: 0x0000000000000000 0x00007ffff7e0ecca
0x7fffffffe460: 0x00007fffffffe548 0x00000001ffffe7c9
0x7fffffffe470: 0x0000555555555050 0x00007ffff7e0e8f8
0x7fffffffe480: 0x0000000000000000 0x0c710afd7e78681b
those lines under the "=>" line are the stack contents printed with
$ x/10gx $sp
Then, we will pop into 0x8(%rsp):
=> 0x55555555505e <main+14>: popq 0x8(%rsp)
0x7fffffffe438: 0x0000000000000346 0x00007fffffffe540
0x7fffffffe448: 0x0000000000000000 0x0000000000000000
0x7fffffffe458: 0x00007ffff7e0ecca 0x00007fffffffe548
0x7fffffffe468: 0x00000001ffffe7c9 0x0000555555555050
0x7fffffffe478: 0x00007ffff7e0e8f8 0x0000000000000000
Now, POP copies the value pointed to by %rsp, *increments* the stack
pointer and *then* computes the effective address of the operand. It
says so in the SDM too (thanks peterz!):
"If the ESP register is used as a base register for addressing a
destination operand in memory, the POP instruction computes the
effective address of the operand after it increments the ESP register."
*That*s why, FLAGS is in 0x7fffffffe448! which is %rsp + 8.
Basically flags is there *twice* on the stack:
(gdb) x/10x 0x7fffffffe438
0x7fffffffe438: 0x0000000000000346 0x00007fffffffe540
^^^^^^^^^^^^^^^^^^
0x7fffffffe448: 0x0000000000000346 0x0000000000000000
^^^^^^^^^^^^^^^^^^
0x7fffffffe458: 0x00007ffff7e0ecca 0x00007fffffffe548
0x7fffffffe468: 0x00000001ffffe7c9 0x0000555555555050
0x7fffffffe478: 0x00007ffff7e0e8f8 0x0000000000000000
and now we read the second copy into %rsi.
=> 0x555555555062 <main+18>: mov 0x8(%rsp),%rsi
0x7fffffffe440: 0x00007fffffffe540 0x0000000000000346
0x7fffffffe450: 0x0000000000000000 0x00007ffff7e0ecca
0x7fffffffe460: 0x00007fffffffe548 0x00000001ffffe7c9
0x7fffffffe470: 0x0000555555555050 0x00007ffff7e0e8f8
0x7fffffffe480: 0x0000000000000000 0x0c710afd7e78681b
Looks like it works as designed.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Sometimes the embedded controller firmware does not
terminate the list of alternate modes that the partner
supports in its response to the GET_ALTERNATE_MODES command.
Instead the firmware returns the supported alternate modes
over and over again until the driver stops requesting them.
If that happens, the number of modes for each alternate mode
will exceed the maximum 6 that is defined in the USB Power
Delivery specification. Making sure that can't happen by
adding a check for it.
This fixes NULL pointer dereference that is caused by the
overrun.
Fixes: ad74b8649beaf ("usb: typec: ucsi: Preliminary support for alternate modes")
Cc: stable(a)vger.kernel.org
Reported-by: Zwane Mwaikambo <zwanem(a)gmail.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/ucsi/ucsi.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index e680fcfdee609..758b988ac518a 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -216,14 +216,18 @@ void ucsi_altmode_update_active(struct ucsi_connector *con)
con->partner_altmode[i] == altmode);
}
-static u8 ucsi_altmode_next_mode(struct typec_altmode **alt, u16 svid)
+static int ucsi_altmode_next_mode(struct typec_altmode **alt, u16 svid)
{
u8 mode = 1;
int i;
- for (i = 0; alt[i]; i++)
+ for (i = 0; alt[i]; i++) {
+ if (i > MODE_DISCOVERY_MAX)
+ return -ERANGE;
+
if (alt[i]->svid == svid)
mode++;
+ }
return mode;
}
@@ -258,8 +262,11 @@ static int ucsi_register_altmode(struct ucsi_connector *con,
goto err;
}
- desc->mode = ucsi_altmode_next_mode(con->port_altmode,
- desc->svid);
+ ret = ucsi_altmode_next_mode(con->port_altmode, desc->svid);
+ if (ret < 0)
+ return ret;
+
+ desc->mode = ret;
switch (desc->svid) {
case USB_TYPEC_DP_SID:
@@ -292,8 +299,11 @@ static int ucsi_register_altmode(struct ucsi_connector *con,
goto err;
}
- desc->mode = ucsi_altmode_next_mode(con->partner_altmode,
- desc->svid);
+ ret = ucsi_altmode_next_mode(con->partner_altmode, desc->svid);
+ if (ret < 0)
+ return ret;
+
+ desc->mode = ret;
alt = typec_partner_register_altmode(con->partner, desc);
if (IS_ERR(alt)) {
--
2.28.0