Commit 2f217d58a8a0 ("perf/x86/amd/uncore: Set the thread mask for
F17h L3 PMCs") inadvertently changed the uncore driver's behaviour
wrt perf tool invocations with or without a CPU list, specified with
-C / --cpu=.
Change the behaviour of the driver to assume the former all-cpu (-a)
case, which is the more commonly desired default. This fixes
'-a -A' invocations without explicit cpu lists (-C) to not count
L3 events only on behalf of the first thread of the first core
in the L3 domain.
BEFORE:
Activity performed by the first thread of the last core (CPU#43) in
CPU#40's L3 domain is not reported by CPU#40:
sudo perf stat -a -A -e l3_request_g1.caching_l3_cache_accesses taskset -c 43 perf bench mem memcpy -s 32mb -l 100 -f default
...
CPU36 21,835 l3_request_g1.caching_l3_cache_accesses
CPU40 87,066 l3_request_g1.caching_l3_cache_accesses
CPU44 17,360 l3_request_g1.caching_l3_cache_accesses
...
AFTER:
The L3 domain activity is now reported by CPU#40:
sudo perf stat -a -A -e l3_request_g1.caching_l3_cache_accesses taskset -c 43 perf bench mem memcpy -s 32mb -l 100 -f default
...
CPU36 354,891 l3_request_g1.caching_l3_cache_accesses
CPU40 1,780,870 l3_request_g1.caching_l3_cache_accesses
CPU44 315,062 l3_request_g1.caching_l3_cache_accesses
...
Reported-by: Stephane Eranian <eranian(a)google.com>
Signed-off-by: Kim Phillips <kim.phillips(a)amd.com>
Fixes: 2f217d58a8a0 ("perf/x86/amd/uncore: Set the thread mask for F17h L3 PMCs")
Cc: Stephane Eranian <eranian(a)google.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme(a)kernel.org>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Michael Petlan <mpetlan(a)redhat.com>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Cc: LKML <linux-kernel(a)vger.kernel.org>
Cc: x86 <x86(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
Original submission:
https://lore.kernel.org/lkml/20200323233159.19601-1-kim.phillips@amd.com/
arch/x86/events/amd/uncore.c | 28 ++++++++--------------------
1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 76400c052b0e..e7e61c8b56bd 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -181,28 +181,16 @@ static void amd_uncore_del(struct perf_event *event, int flags)
}
/*
- * Convert logical CPU number to L3 PMC Config ThreadMask format
+ * Return a full thread and slice mask until per-CPU is
+ * properly supported.
*/
-static u64 l3_thread_slice_mask(int cpu)
+static u64 l3_thread_slice_mask(void)
{
- u64 thread_mask, core = topology_core_id(cpu);
- unsigned int shift, thread = 0;
+ if (boot_cpu_data.x86 <= 0x18)
+ return AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK;
- if (topology_smt_supported() && !topology_is_primary_thread(cpu))
- thread = 1;
-
- if (boot_cpu_data.x86 <= 0x18) {
- shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
- thread_mask = BIT_ULL(shift);
-
- return AMD64_L3_SLICE_MASK | thread_mask;
- }
-
- core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
- shift = AMD64_L3_THREAD_SHIFT + thread;
- thread_mask = BIT_ULL(shift);
-
- return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
+ return AMD64_L3_EN_ALL_SLICES | AMD64_L3_EN_ALL_CORES |
+ AMD64_L3_F19H_THREAD_MASK;
}
static int amd_uncore_event_init(struct perf_event *event)
@@ -232,7 +220,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* For other events, the two fields do not affect the count.
*/
if (l3_mask && is_llc_event(event))
- hwc->config |= l3_thread_slice_mask(event->cpu);
+ hwc->config |= l3_thread_slice_mask();
uncore = event_to_amd_uncore(event);
if (!uncore)
--
2.27.0
Not entirely sure why this never came up when I originally tested this
(maybe some BIOSes already have this setup?) but the ->caps_init vfunc
appears to cause the display engine to throw an exception on driver
init, at least on my ThinkPad P72:
nouveau 0000:01:00.0: disp: chid 0 mthd 008c data 00000000 0000508c 0000102b
This is magic nvidia speak for "You need to have the DMA notifier offset
programmed before you can call NV507D_GET_CAPABILITIES." So, let's fix
this by doing that.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Fixes: 4a2cb4181b07 ("drm/nouveau/kms/nv50-: Probe SOR and PIOR caps for DP interlacing support")
Cc: <stable(a)vger.kernel.org> # v5.8+
---
drivers/gpu/drm/nouveau/dispnv50/core507d.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core507d.c b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
index ad1f09a143aa4..c984080ce99f2 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core507d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
@@ -80,11 +80,19 @@ core507d_caps_init(struct nouveau_drm *drm, struct nv50_disp *disp)
struct nvif_push *push = disp->core->chan.push;
int ret;
- if ((ret = PUSH_WAIT(push, 2)))
+ if ((ret = PUSH_WAIT(push, 4)))
return ret;
+ PUSH_MTHD(push, NV507D, SET_NOTIFIER_CONTROL,
+ NVDEF(NV507D, SET_NOTIFIER_CONTROL, MODE, WRITE) |
+ NVVAL(NV507D, SET_NOTIFIER_CONTROL, OFFSET, NV50_DISP_CORE_NTFY >> 2) |
+ NVDEF(NV507D, SET_NOTIFIER_CONTROL, NOTIFY, ENABLE));
PUSH_MTHD(push, NV507D, GET_CAPABILITIES, 0x00000000);
- return PUSH_KICK(push);
+ ret = PUSH_KICK(push);
+ if (ret)
+ return ret;
+
+ return 0;
}
int
--
2.26.2
ext4_search_dir() and ext4_generic_delete_entry() can be called both for
standard director blocks and for inline directories stored inside inode
or inline xattr space. For the second case we didn't call
ext4_check_dir_entry() with proper constraints that could result in
accepting corrupted directory entry as well as false positive filesystem
errors like:
EXT4-fs error (device dm-0): ext4_search_dir:1395: inode #28320400:
block 113246792: comm dockerd: bad entry in directory: directory entry too
close to block end - offset=0, inode=28320403, rec_len=32, name_len=8,
size=4096
Fix the arguments passed to ext4_check_dir_entry().
Fixes: 109ba779d6cc ("ext4: check for directory entries too close to block end")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/ext4/namei.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf..98b91f2314eb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -2472,7 +2472,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
--
2.16.4
SCHED_RESTART code path is relied to re-run queue for dispatch requests
in hctx->dispatch. Meantime the SCHED_RSTART flag is checked when adding
requests to hctx->dispatch.
memory barriers have to be used for ordering the following two pair of OPs:
1) adding requests to hctx->dispatch and checking SCHED_RESTART in
blk_mq_dispatch_rq_list()
2) clearing SCHED_RESTART and checking if there is request in hctx->dispatch
in blk_mq_sched_restart().
Without the added memory barrier, either:
1) blk_mq_sched_restart() may miss requests added to hctx->dispatch meantime
blk_mq_dispatch_rq_list() observes SCHED_RESTART, and not run queue in
dispatch side
or
2) blk_mq_dispatch_rq_list still sees SCHED_RESTART, and not run queue
in dispatch side, meantime checking if there is request in
hctx->dispatch from blk_mq_sched_restart() is missed.
IO hang in ltp/fs_fill test is reported by kernel test robot:
https://lkml.org/lkml/2020/7/26/77
Turns out it is caused by the above out-of-order OPs. And the IO hang
can't be observed any more after applying this patch.
Cc: Bart Van Assche <bvanassche(a)acm.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: David Jeffery <djeffery(a)redhat.com>
Reported-by: kernel test robot <rong.a.chen(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-mq-sched.c | 9 +++++++++
block/blk-mq.c | 9 +++++++++
2 files changed, 18 insertions(+)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a19cdf159b75..d2790e5b06d1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -78,6 +78,15 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
return;
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+ /*
+ * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
+ * in blk_mq_run_hw_queue(). Its pair is the barrier in
+ * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
+ * meantime new request added to hctx->dispatch is missed to check in
+ * blk_mq_run_hw_queue().
+ */
+ smp_mb();
+
blk_mq_run_hw_queue(hctx, true);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0015a1892153..6c1c3ad175a9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1437,6 +1437,15 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
list_splice_tail_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
+ /*
+ * Order adding requests to hctx->dispatch and checking
+ * SCHED_RESTART flag. The pair of this smp_mb() is the one
+ * in blk_mq_sched_restart(). Avoid restart code path to
+ * miss the new added requests to hctx->dispatch, meantime
+ * SCHED_RESTART is observed here.
+ */
+ smp_mb();
+
/*
* If SCHED_RESTART was set by the caller of this function and
* it is no longer set that means that it was cleared by another
--
2.25.2
Before this commit i2c_hid_parse() consists of the following steps:
1. Send power on cmd
2. usleep_range(1000, 5000)
3. Send reset cmd
4. Wait for reset to complete (device interrupt, or msleep(100))
5. Send power on cmd
6. Try to read HID descriptor
Notice how there is an usleep_range(1000, 5000) after the first power-on
command, but not after the second power-on command.
Testing has shown that at least on the BMAX Y13 laptop's i2c-hid touchpad,
not having a delay after the second power-on command causes the HID
descriptor to read as all zeros.
In case we hit this on other devices too, the descriptor being all zeros
can be recognized by the following message being logged many, many times:
hid-generic 0018:0911:5288.0002: unknown main item tag 0x0
At the same time as the BMAX Y13's touchpad issue was debugged,
Kai-Heng was working on debugging some issues with Goodix i2c-hid
touchpads. It turns out that these need a delay after a PWR_ON command
too, otherwise they stop working after a suspend/resume cycle.
According to Goodix a delay of minimal 60ms is needed.
Having multiple cases where we need a delay after sending the power-on
command, seems to indicate that we should always sleep after the power-on
command.
This commit fixes the mentioned issues by moving the existing 1ms sleep to
the i2c_hid_set_power() function and changing it to a 60ms sleep.
Cc: stable(a)vger.kernel.org
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208247
Reported-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Reported-and-tested-by: Andrea Borgia <andrea(a)borgia.bo.it>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
- Add Kai-Heng's case, with Goodix touchpads needing a delay after PWR_ON too,
to the commit message
- Add a Reported-by tag for Kai-Heng
- Increase the delay to 60ms
---
drivers/hid/i2c-hid/i2c-hid-core.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 294c84e136d7..dbd04492825d 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -420,6 +420,19 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
dev_err(&client->dev, "failed to change power setting.\n");
set_pwr_exit:
+
+ /*
+ * The HID over I2C specification states that if a DEVICE needs time
+ * after the PWR_ON request, it should utilise CLOCK stretching.
+ * However, it has been observered that the Windows driver provides a
+ * 1ms sleep between the PWR_ON and RESET requests.
+ * According to Goodix Windows even waits 60 ms after (other?)
+ * PWR_ON requests. Testing has confirmed that several devices
+ * will not work properly without a delay after a PWR_ON request.
+ */
+ if (!ret && power_state == I2C_HID_PWR_ON)
+ msleep(60);
+
return ret;
}
@@ -441,15 +454,6 @@ static int i2c_hid_hwreset(struct i2c_client *client)
if (ret)
goto out_unlock;
- /*
- * The HID over I2C specification states that if a DEVICE needs time
- * after the PWR_ON request, it should utilise CLOCK stretching.
- * However, it has been observered that the Windows driver provides a
- * 1ms sleep between the PWR_ON and RESET requests and that some devices
- * rely on this.
- */
- usleep_range(1000, 5000);
-
i2c_hid_dbg(ihid, "resetting...\n");
ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0);
--
2.28.0
From: Sean Paul <seanpaul(a)chromium.org>
This patch fixes a few bugs:
1- We weren't taking into account sha_leftovers when adding multiple
ksvs to sha_text. As such, we were or'ing the end of ksv[j - 1] with
the beginning of ksv[j]
2- In the sha_leftovers == 2 and sha_leftovers == 3 case, bstatus was
being placed on the wrong half of sha_text, overlapping the leftover
ksv value
3- In the sha_leftovers == 2 case, we need to manually terminate the
byte stream with 0x80 since the hardware doesn't have enough room to
add it after writing M0
The upside is that all of the HDCP supported HDMI repeaters I could
find on Amazon just strip HDCP anyways, so it turns out to be _really_
hard to hit any of these cases without an MST hub, which is not (yet)
supported. Oh, and the sha_leftovers == 1 case works perfectly!
Fixes: ee5e5e7a5e0f (drm/i915: Add HDCP framework + base implementation)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ramalingam C <ramalingam.c(a)intel.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sean Paul <seanpaul(a)chromium.org>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v4.17+
Reviewed-by: Ramalingam C <ramalingam.c(a)intel.com>
Signed-off-by: Sean Paul <seanpaul(a)chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20191203173638.94919-2-sean@p… #v1
Link: https://patchwork.freedesktop.org/patch/msgid/20191212190230.188505-2-sean@… #v2
Link: https://patchwork.freedesktop.org/patch/msgid/20200117193103.156821-2-sean@… #v3
Link: https://patchwork.freedesktop.org/patch/msgid/20200218220242.107265-2-sean@… #v4
Link: https://patchwork.freedesktop.org/patch/msgid/20200305201236.152307-2-sean@… #v5
Link: https://patchwork.freedesktop.org/patch/msgid/20200429195502.39919-2-sean@p… #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20200623155907.22961-2-sean@p… #v7
Changes in v2:
-None
Changes in v3:
-None
Changes in v4:
-Rebased on intel_de_write changes
Changes in v5:
-None
Changes in v6:
-None
Changes in v7:
-None
Changes in v8:
-None
---
drivers/gpu/drm/i915/display/intel_hdcp.c | 26 +++++++++++++++++------
include/drm/drm_hdcp.h | 3 +++
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 89a4d294822d..6189b7583277 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -336,8 +336,10 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector,
/* Fill up the empty slots in sha_text and write it out */
sha_empty = sizeof(sha_text) - sha_leftovers;
- for (j = 0; j < sha_empty; j++)
- sha_text |= ksv[j] << ((sizeof(sha_text) - j - 1) * 8);
+ for (j = 0; j < sha_empty; j++) {
+ u8 off = ((sizeof(sha_text) - j - 1 - sha_leftovers) * 8);
+ sha_text |= ksv[j] << off;
+ }
ret = intel_write_sha_text(dev_priv, sha_text);
if (ret < 0)
@@ -435,7 +437,7 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector,
/* Write 32 bits of text */
intel_de_write(dev_priv, HDCP_REP_CTL,
rep_ctl | HDCP_SHA1_TEXT_32);
- sha_text |= bstatus[0] << 24 | bstatus[1] << 16;
+ sha_text |= bstatus[0] << 8 | bstatus[1];
ret = intel_write_sha_text(dev_priv, sha_text);
if (ret < 0)
return ret;
@@ -450,17 +452,29 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector,
return ret;
sha_idx += sizeof(sha_text);
}
+
+ /*
+ * Terminate the SHA-1 stream by hand. For the other leftover
+ * cases this is appended by the hardware.
+ */
+ intel_de_write(dev_priv, HDCP_REP_CTL,
+ rep_ctl | HDCP_SHA1_TEXT_32);
+ sha_text = DRM_HDCP_SHA1_TERMINATOR << 24;
+ ret = intel_write_sha_text(dev_priv, sha_text);
+ if (ret < 0)
+ return ret;
+ sha_idx += sizeof(sha_text);
} else if (sha_leftovers == 3) {
- /* Write 32 bits of text */
+ /* Write 32 bits of text (filled from LSB) */
intel_de_write(dev_priv, HDCP_REP_CTL,
rep_ctl | HDCP_SHA1_TEXT_32);
- sha_text |= bstatus[0] << 24;
+ sha_text |= bstatus[0];
ret = intel_write_sha_text(dev_priv, sha_text);
if (ret < 0)
return ret;
sha_idx += sizeof(sha_text);
- /* Write 8 bits of text, 24 bits of M0 */
+ /* Write 8 bits of text (filled from LSB), 24 bits of M0 */
intel_de_write(dev_priv, HDCP_REP_CTL,
rep_ctl | HDCP_SHA1_TEXT_8);
ret = intel_write_sha_text(dev_priv, bstatus[1]);
diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index c6bab4986a65..fe58dbb46962 100644
--- a/include/drm/drm_hdcp.h
+++ b/include/drm/drm_hdcp.h
@@ -29,6 +29,9 @@
/* Slave address for the HDCP registers in the receiver */
#define DRM_HDCP_DDC_ADDR 0x3A
+/* Value to use at the end of the SHA-1 bytestream used for repeaters */
+#define DRM_HDCP_SHA1_TERMINATOR 0x80
+
/* HDCP register offsets for HDMI/DVI devices */
#define DRM_HDCP_DDC_BKSV 0x00
#define DRM_HDCP_DDC_RI_PRIME 0x08
--
Sean Paul, Software Engineer, Google / Chromium OS