The following commit has been merged into the perf/core branch of tip:
Commit-ID: ad97196379d0b8cb24ef3d5006978a6554e6467f
Gitweb: https://git.kernel.org/tip/ad97196379d0b8cb24ef3d5006978a6554e6467f
Author: Adrian Hunter <adrian.hunter(a)intel.com>
AuthorDate: Mon, 24 Jun 2024 23:10:56 +03:00
Committer: Peter Zijlstra <peterz(a)infradead.org>
CommitterDate: Thu, 04 Jul 2024 16:00:21 +02:00
perf/x86/intel/pt: Fix a topa_entry base address calculation
topa_entry->base is a bit-field. Bit-fields are not promoted to a 64-bit
type, even if the underlying type is 64-bit, and so, if necessary, must
be cast to a larger type when calculations are done.
Fix a topa_entry->base address calculation by adding a cast.
Without the cast, the address was limited to 36-bits i.e. 64GiB.
The address calculation is used on systems that do not support Multiple
Entry ToPA (only Broadwell), and affects physical addresses on or above
64GiB. Instead of writing to the correct address, the address comprising
the first 36 bits would be written to.
Intel PT snapshot and sampling modes are not affected.
Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver")
Reported-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240624201101.60186-3-adrian.hunter@intel.com
---
arch/x86/events/intel/pt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 14db6d9..047a2cd 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
+ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
The patch titled
Subject: mm-fix-khugepaged-activation-policy-v3
has been added to the -mm mm-unstable branch. Its filename is
mm-fix-khugepaged-activation-policy-v3.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm-fix-khugepaged-activation-policy-v3
Date: Fri, 5 Jul 2024 11:28:48 +0100
- Make hugepage_pmd_enabled() out-of-line static in khugepaged.c (per Andrew)
- Refactor hugepage_pmd_enabled() for better readability (per Andrew)
Link: https://lkml.kernel.org/r/20240705102849.2479686-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Fixes: 3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
Closes: https://lore.kernel.org/linux-mm/7a0bbe69-1e3d-4263-b206-da007791a5c4@redha…
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Lance Yang <ioworker0(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/huge_mm.h | 13 -------------
mm/khugepaged.c | 20 ++++++++++++++++++++
2 files changed, 20 insertions(+), 13 deletions(-)
--- a/include/linux/huge_mm.h~mm-fix-khugepaged-activation-policy-v3
+++ a/include/linux/huge_mm.h
@@ -128,19 +128,6 @@ static inline bool hugepage_global_alway
(1<<TRANSPARENT_HUGEPAGE_FLAG);
}
-static inline bool hugepage_pmd_enabled(void)
-{
- /*
- * We cover both the anon and the file-backed case here; file-backed
- * hugepages, when configured in, are determined by the global control.
- * Anon pmd-sized hugepages are determined by the pmd-size control.
- */
- return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && hugepage_global_enabled()) ||
- test_bit(PMD_ORDER, &huge_anon_orders_always) ||
- test_bit(PMD_ORDER, &huge_anon_orders_madvise) ||
- (test_bit(PMD_ORDER, &huge_anon_orders_inherit) && hugepage_global_enabled());
-}
-
static inline int highest_order(unsigned long orders)
{
return fls_long(orders) - 1;
--- a/mm/khugepaged.c~mm-fix-khugepaged-activation-policy-v3
+++ a/mm/khugepaged.c
@@ -413,6 +413,26 @@ static inline int hpage_collapse_test_ex
test_bit(MMF_DISABLE_THP, &mm->flags);
}
+static bool hugepage_pmd_enabled(void)
+{
+ /*
+ * We cover both the anon and the file-backed case here; file-backed
+ * hugepages, when configured in, are determined by the global control.
+ * Anon pmd-sized hugepages are determined by the pmd-size control.
+ */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ hugepage_global_enabled())
+ return true;
+ if (test_bit(PMD_ORDER, &huge_anon_orders_always))
+ return true;
+ if (test_bit(PMD_ORDER, &huge_anon_orders_madvise))
+ return true;
+ if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
+ hugepage_global_enabled())
+ return true;
+ return false;
+}
+
void __khugepaged_enter(struct mm_struct *mm)
{
struct khugepaged_mm_slot *mm_slot;
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-fix-khugepaged-activation-policy.patch
mm-fix-khugepaged-activation-policy-v3.patch
These igc bug fixes are sent as a patch series because:
1. The patch "igc: Remove unused qbv_count" has dependency on:
"igc: Fix qbv_config_change_errors logics"
"igc: Fix reset adapter logics when tx mode change"
These two patches remove the reliance on using the qbv_count field.
2. The patch "igc: Fix qbv tx latency by setting gtxoffset" reuse the
function igc_tsn_will_tx_mode_change() created in the patch:
"igc: Fix reset adapter logics when tx mode change"
Faizal Rahim (4):
igc: Fix qbv_config_change_errors logics
igc: Fix reset adapter logics when tx mode change
igc: Remove unused qbv_count
igc: Fix qbv tx latency by setting gtxoffset
drivers/net/ethernet/intel/igc/igc.h | 1 -
drivers/net/ethernet/intel/igc/igc_main.c | 9 +++--
drivers/net/ethernet/intel/igc/igc_tsn.c | 49 ++++++++++++++++-------
drivers/net/ethernet/intel/igc/igc_tsn.h | 1 +
4 files changed, 42 insertions(+), 18 deletions(-)
--
2.25.1
Switching to transparent mode leads to a loss of link synchronization,
so prevent doing this on an active link. This happened at least on an
Intel N100 system / DELL UD22 dock, the LTTPR residing either on the
host or the dock. To fix the issue, keep the current mode on an active
link, adjusting the LTTPR count accordingly (resetting it to 0 in
transparent mode).
Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training")
Reported-and-tested-by: Gareth Yu <gareth.yu(a)intel.com>
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902
Cc: <stable(a)vger.kernel.org> # v5.15+
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
.../drm/i915/display/intel_dp_link_training.c | 49 +++++++++++++++++--
1 file changed, 45 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 1bc4ef84ff3bc..08a27fe077917 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable)
return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1;
}
-static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp)
+{
+ return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE -
+ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] ==
+ DP_PHY_REPEATER_MODE_TRANSPARENT;
+}
+
+/*
+ * Read the LTTPR common capabilities and switch the LTTPR PHYs to
+ * non-transparent mode if this is supported. Preserve the
+ * transparent/non-transparent mode on an active link.
+ *
+ * Return the number of detected LTTPRs in non-transparent mode or 0 if the
+ * LTTPRs are in transparent mode or the detection failed.
+ */
+static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
{
int lttpr_count;
- int i;
if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd))
return 0;
@@ -134,6 +148,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
if (lttpr_count == 0)
return 0;
+ /*
+ * Don't change the mode on an active link, to prevent a loss of link
+ * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR
+ * resetting its internal state when the mode is changed from
+ * non-transparent to transparent.
+ */
+ if (intel_dp->link_trained) {
+ if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp))
+ goto out_reset_lttpr_count;
+
+ return lttpr_count;
+ }
+
/*
* See DP Standard v2.0 3.6.6.1. about the explicit disabling of
* non-transparent mode and the disable->enable non-transparent mode
@@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI
"Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n");
intel_dp_set_lttpr_transparent_mode(intel_dp, true);
- intel_dp_reset_lttpr_count(intel_dp);
- return 0;
+ goto out_reset_lttpr_count;
}
+ return lttpr_count;
+
+out_reset_lttpr_count:
+ intel_dp_reset_lttpr_count(intel_dp);
+
+ return 0;
+}
+
+static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+ int lttpr_count;
+ int i;
+
+ lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd);
+
for (i = 0; i < lttpr_count; i++)
intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i));
--
2.43.3
Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_hmac_session*(). Thus, address
!chip->auth in tpm_buf_hmac_session*() and remove the fallback
implementation for !TCG_TPM2_HMAC.
Cc: stable(a)vger.kernel.org # v6.9+
Reported-by: Stefan Berger <stefanb(a)linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@li…
Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v2:
- Use auth in place of chip->auth.
---
drivers/char/tpm/tpm2-sessions.c | 181 ++++++++++++++++++-------------
include/linux/tpm.h | 67 ++++--------
2 files changed, 124 insertions(+), 124 deletions(-)
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 06d0f10a2301..304247090b56 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -268,6 +268,105 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
}
EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+/**
+ * tpm_buf_append_hmac_session() - Append a TPM session element
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @attributes: The session attributes
+ * @passphrase: The session authority (NULL if none)
+ * @passphrase_len: The length of the session authority (0 if none)
+ *
+ * This fills in a session structure in the TPM command buffer, except
+ * for the HMAC which cannot be computed until the command buffer is
+ * complete. The type of session is controlled by the @attributes,
+ * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
+ * session won't terminate after tpm_buf_check_hmac_response(),
+ * TPM2_SA_DECRYPT which means this buffers first parameter should be
+ * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
+ * response buffer's first parameter needs to be decrypted (confusing,
+ * but the defines are written from the point of view of the TPM).
+ *
+ * Any session appended by this command must be finalized by calling
+ * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
+ * and the TPM will reject the command.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphrase_len)
+{
+ struct tpm2_auth *auth = chip->auth;
+ u8 nonce[SHA256_DIGEST_SIZE];
+ u32 len;
+
+ if (!auth) {
+ /* offset tells us where the sessions area begins */
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ u32 len = 9 + passphrase_len;
+
+ if (tpm_buf_length(buf) != offset) {
+ /* not the first session so update the existing length */
+ len += get_unaligned_be32(&buf->data[offset]);
+ put_unaligned_be32(len, &buf->data[offset]);
+ } else {
+ tpm_buf_append_u32(buf, len);
+ }
+ /* auth handle */
+ tpm_buf_append_u32(buf, TPM2_RS_PW);
+ /* nonce */
+ tpm_buf_append_u16(buf, 0);
+ /* attributes */
+ tpm_buf_append_u8(buf, 0);
+ /* passphrase */
+ tpm_buf_append_u16(buf, passphrase_len);
+ tpm_buf_append(buf, passphrase, passphrase_len);
+ return;
+ }
+
+ /*
+ * The Architecture Guide requires us to strip trailing zeros
+ * before computing the HMAC
+ */
+ while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0')
+ passphrase_len--;
+
+ auth->attrs = attributes;
+ auth->passphrase_len = passphrase_len;
+ if (passphrase_len)
+ memcpy(auth->passphrase, passphrase, passphrase_len);
+
+ if (auth->session != tpm_buf_length(buf)) {
+ /* we're not the first session */
+ len = get_unaligned_be32(&buf->data[auth->session]);
+ if (4 + len + auth->session != tpm_buf_length(buf)) {
+ WARN(1, "session length mismatch, cannot append");
+ return;
+ }
+
+ /* add our new session */
+ len += 9 + 2 * SHA256_DIGEST_SIZE;
+ put_unaligned_be32(len, &buf->data[auth->session]);
+ } else {
+ tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
+ }
+
+ /* random number for our nonce */
+ get_random_bytes(nonce, sizeof(nonce));
+ memcpy(auth->our_nonce, nonce, sizeof(nonce));
+ tpm_buf_append_u32(buf, auth->handle);
+ /* our new nonce */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+ tpm_buf_append_u8(buf, auth->attrs);
+ /* and put a placeholder for the hmac */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session);
+
#ifdef CONFIG_TCG_TPM2_HMAC
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
@@ -449,82 +548,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
crypto_free_kpp(kpp);
}
-/**
- * tpm_buf_append_hmac_session() - Append a TPM session element
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @attributes: The session attributes
- * @passphrase: The session authority (NULL if none)
- * @passphrase_len: The length of the session authority (0 if none)
- *
- * This fills in a session structure in the TPM command buffer, except
- * for the HMAC which cannot be computed until the command buffer is
- * complete. The type of session is controlled by the @attributes,
- * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
- * session won't terminate after tpm_buf_check_hmac_response(),
- * TPM2_SA_DECRYPT which means this buffers first parameter should be
- * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
- * response buffer's first parameter needs to be decrypted (confusing,
- * but the defines are written from the point of view of the TPM).
- *
- * Any session appended by this command must be finalized by calling
- * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
- * and the TPM will reject the command.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphrase_len)
-{
- u8 nonce[SHA256_DIGEST_SIZE];
- u32 len;
- struct tpm2_auth *auth = chip->auth;
-
- /*
- * The Architecture Guide requires us to strip trailing zeros
- * before computing the HMAC
- */
- while (passphrase && passphrase_len > 0
- && passphrase[passphrase_len - 1] == '\0')
- passphrase_len--;
-
- auth->attrs = attributes;
- auth->passphrase_len = passphrase_len;
- if (passphrase_len)
- memcpy(auth->passphrase, passphrase, passphrase_len);
-
- if (auth->session != tpm_buf_length(buf)) {
- /* we're not the first session */
- len = get_unaligned_be32(&buf->data[auth->session]);
- if (4 + len + auth->session != tpm_buf_length(buf)) {
- WARN(1, "session length mismatch, cannot append");
- return;
- }
-
- /* add our new session */
- len += 9 + 2 * SHA256_DIGEST_SIZE;
- put_unaligned_be32(len, &buf->data[auth->session]);
- } else {
- tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
- }
-
- /* random number for our nonce */
- get_random_bytes(nonce, sizeof(nonce));
- memcpy(auth->our_nonce, nonce, sizeof(nonce));
- tpm_buf_append_u32(buf, auth->handle);
- /* our new nonce */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
- tpm_buf_append_u8(buf, auth->attrs);
- /* and put a placeholder for the hmac */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
-}
-EXPORT_SYMBOL(tpm_buf_append_hmac_session);
-
/**
* tpm_buf_fill_hmac_session() - finalize the session HMAC
* @chip: the TPM chip structure
@@ -555,6 +578,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
u8 cphash[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
+ if (!auth)
+ return;
+
/* save the command code in BE format */
auth->ordinal = head->ordinal;
@@ -713,6 +739,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
u32 cc = be32_to_cpu(auth->ordinal);
int parm_len, len, i, handles;
+ if (!auth)
+ return rc;
+
if (auth->session >= TPM_HEADER_SIZE) {
WARN(1, "tpm session not filled correctly\n");
goto out;
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 2844fea4a12a..912fd0d2646d 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -493,22 +493,35 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
-
-#ifdef CONFIG_TCG_TPM2_HMAC
-
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
+
static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
struct tpm_buf *buf,
u8 attributes,
u8 *passphrase,
int passphraselen)
{
- tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
- passphraselen);
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+
+ if (chip->auth) {
+ tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
+ passphraselen);
+ } else {
+ /*
+ * If the only sessions are optional, the command tag must change to
+ * TPM2_ST_NO_SESSIONS.
+ */
+ if (tpm_buf_length(buf) == offset)
+ head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+ }
}
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
@@ -523,48 +536,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphraselen)
-{
- /* offset tells us where the sessions area begins */
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- u32 len = 9 + passphraselen;
-
- if (tpm_buf_length(buf) != offset) {
- /* not the first session so update the existing length */
- len += get_unaligned_be32(&buf->data[offset]);
- put_unaligned_be32(len, &buf->data[offset]);
- } else {
- tpm_buf_append_u32(buf, len);
- }
- /* auth handle */
- tpm_buf_append_u32(buf, TPM2_RS_PW);
- /* nonce */
- tpm_buf_append_u16(buf, 0);
- /* attributes */
- tpm_buf_append_u8(buf, 0);
- /* passphrase */
- tpm_buf_append_u16(buf, passphraselen);
- tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes,
- u8 *passphrase,
- int passphraselen)
-{
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- struct tpm_header *head = (struct tpm_header *) buf->data;
-
- /*
- * if the only sessions are optional, the command tag
- * must change to TPM2_ST_NO_SESSIONS
- */
- if (tpm_buf_length(buf) == offset)
- head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf)
{
--
2.45.2
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.
However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.
Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.
So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.
Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.
v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)
v3:
- Really rephrase wording.
Signed-off-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra <pallavi.mishra(a)intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Effie Yu <effie.yu(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Jose Souza <jose.souza(a)intel.com>
Cc: Michal Mrozek <michal.mrozek(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
Acked-by: Matthew Auld <matthew.auld(a)intel.com>
Acked-by: José Roberto de Souza <jose.souza(a)intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek <michal.mrozek(a)intel.com>
Acked-by: Effie Yu <effie.yu(a)intel.com> #On chat
---
drivers/gpu/drm/xe/xe_bo.c | 47 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_bo_types.h | 3 +-
include/uapi/drm/xe_drm.h | 8 +++++-
3 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
- enum ttm_caching caching;
+ enum ttm_caching caching = ttm_cached;
int err;
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
PAGE_SIZE);
- switch (bo->cpu_caching) {
- case DRM_XE_GEM_CPU_CACHING_WC:
- caching = ttm_write_combined;
- break;
- default:
- caching = ttm_cached;
- break;
- }
-
- WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
- * Display scanout is always non-coherent with the CPU cache.
- *
- * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
- * require a CPU:WC mapping.
+ * DGFX system memory is always WB / ttm_cached, since
+ * other caching modes are only supported on x86. DGFX
+ * GPU system memory accesses are always coherent with the
+ * CPU.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
- caching = ttm_write_combined;
+ if (!IS_DGFX(xe)) {
+ switch (bo->cpu_caching) {
+ case DRM_XE_GEM_CPU_CACHING_WC:
+ caching = ttm_write_combined;
+ break;
+ default:
+ caching = ttm_cached;
+ break;
+ }
+
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+ /*
+ * Display scanout is always non-coherent with the CPU cache.
+ *
+ * For Xe_LPG and beyond, PPGTT PTE lookups are also
+ * non-coherent and require a CPU:WC mapping.
+ */
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 &&
+ bo->flags & XE_BO_FLAG_PAGETABLE))
+ caching = ttm_write_combined;
+ }
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 02d68873558a..ebc8abf7930a 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -68,7 +68,8 @@ struct xe_bo {
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
- * objects.
+ * objects. Exceptions are system memory on DGFX, which is always
+ * WB.
*/
u16 cpu_caching;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 33544ef78d3e..19619d4952a8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
#define DRM_XE_GEM_CPU_CACHING_WC 2
/**
* @cpu_caching: The CPU caching mode to select for this object. If
- * mmaping the object the mode selected here will also be used.
+ * mmaping the object the mode selected here will also be used. The
+ * exception is when mapping system memory (including data evicted
+ * to system) on discrete GPUs. The caching mode selected will
+ * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+ * between GPU- and CPU is guaranteed. The caching mode of
+ * existing CPU-mappings will be updated transparently to
+ * user-space clients.
*/
__u16 cpu_caching;
/** @pad: MBZ */
--
2.44.0
From: Alain Volmat <alain.volmat(a)foss.st.com>
Correct error handling within the dcmipp_create_subdevs by properly
decrementing the i counter when releasing the subdevs.
Fixes: 28e0f3772296 ("media: stm32-dcmipp: STM32 DCMIPP camera interface driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alain Volmat <alain.volmat(a)foss.st.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
[hverkuil: correct the indices: it's [i], not [i - 1].]
---
The original patch would cause a crash due to the incorrect indices in the
statement after the while. Since 'i' can now become 0, so i - 1 would be a
negative index access, which was obviously not the intention.
I reverted the patch once I noticed this (better to hang in an infinite
loop than to crash), but I want to get a proper fix in. Rather than
waiting for that, I decided to just take the original patch from Alain, with
just the indices fixed.
---
drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c b/drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c
index 4acc3b90d03a..7f771ea49b78 100644
--- a/drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c
+++ b/drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c
@@ -202,8 +202,8 @@ static int dcmipp_create_subdevs(struct dcmipp_device *dcmipp)
return 0;
err_init_entity:
- while (i > 0)
- dcmipp->pipe_cfg->ents[i - 1].release(dcmipp->entity[i - 1]);
+ while (i-- > 0)
+ dcmipp->pipe_cfg->ents[i].release(dcmipp->entity[i]);
return ret;
}
--
2.43.0
Since the introduction of mTHP, the docuementation has stated that
khugepaged would be enabled when any mTHP size is enabled, and disabled
when all mTHP sizes are disabled. There are 2 problems with this; 1.
this is not what was implemented by the code and 2. this is not the
desirable behavior.
Desirable behavior is for khugepaged to be enabled when any PMD-sized
THP is enabled, anon or file. (Note that file THP is still controlled by
the top-level control so we must always consider that, as well as the
PMD-size mTHP control for anon). khugepaged only supports collapsing to
PMD-sized THP so there is no value in enabling it when PMD-sized THP is
disabled. So let's change the code and documentation to reflect this
policy.
Further, per-size enabled control modification events were not
previously forwarded to khugepaged to give it an opportunity to start or
stop. Consequently the following was resulting in khugepaged eroneously
not being activated:
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo always > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Fixes: 3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
Closes: https://lore.kernel.org/linux-mm/7a0bbe69-1e3d-4263-b206-da007791a5c4@redha…
Cc: stable(a)vger.kernel.org
---
Hi All,
Applies on top of mm-unstable from a couple of days ago (9bb8753acdd8). No
regressions observed in mm selftests.
When fixing this I also noticed that khugepaged doesn't get (and never has been)
activated/deactivated by `shmem_enabled=`. I've concluded that this is
definitely a (separate) bug. But I'm waiting for the conclusion of the
conversation at [2] before fixing, so will send separately.
Changes since v1 [1]
====================
- hugepage_pmd_enabled() now considers CONFIG_READ_ONLY_THP_FOR_FS as part of
decision; means that for kernels without this config, khugepaged will not be
started when only the top-level control is enabled.
[1] https://lore.kernel.org/linux-mm/20240702144617.2291480-1-ryan.roberts@arm.…
[2] https://lore.kernel.org/linux-mm/65c37315-2741-481f-b433-cec35ef1af35@arm.c…
Thanks,
Ryan
Documentation/admin-guide/mm/transhuge.rst | 11 +++++------
include/linux/huge_mm.h | 15 ++++++++-------
mm/huge_memory.c | 7 +++++++
mm/khugepaged.c | 13 ++++++-------
4 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 709fe10b60f4..fc321d40b8ac 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -202,12 +202,11 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-khugepaged will be automatically started when one or more hugepage
-sizes are enabled (either by directly setting "always" or "madvise",
-or by setting "inherit" while the top-level enabled is set to "always"
-or "madvise"), and it'll be automatically shutdown when the last
-hugepage size is disabled (either by directly setting "never", or by
-setting "inherit" while the top-level enabled is set to "never").
+khugepaged will be automatically started when PMD-sized THP is enabled
+(either of the per-size anon control or the top-level control are set
+to "always" or "madvise"), and it'll be automatically shutdown when
+PMD-sized THP is disabled (when both the per-size anon control and the
+top-level control are "never")
Khugepaged controls
-------------------
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4d155c7a4792..6debd8b6fd7d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -128,16 +128,17 @@ static inline bool hugepage_global_always(void)
(1<<TRANSPARENT_HUGEPAGE_FLAG);
}
-static inline bool hugepage_flags_enabled(void)
+static inline bool hugepage_pmd_enabled(void)
{
/*
- * We cover both the anon and the file-backed case here; we must return
- * true if globally enabled, even when all anon sizes are set to never.
- * So we don't need to look at huge_anon_orders_inherit.
+ * We cover both the anon and the file-backed case here; file-backed
+ * hugepages, when configured in, are determined by the global control.
+ * Anon pmd-sized hugepages are determined by the pmd-size control.
*/
- return hugepage_global_enabled() ||
- READ_ONCE(huge_anon_orders_always) ||
- READ_ONCE(huge_anon_orders_madvise);
+ return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && hugepage_global_enabled()) ||
+ test_bit(PMD_ORDER, &huge_anon_orders_always) ||
+ test_bit(PMD_ORDER, &huge_anon_orders_madvise) ||
+ (test_bit(PMD_ORDER, &huge_anon_orders_inherit) && hugepage_global_enabled());
}
static inline int highest_order(unsigned long orders)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 251d6932130f..085f5e973231 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -502,6 +502,13 @@ static ssize_t thpsize_enabled_store(struct kobject *kobj,
} else
ret = -EINVAL;
+ if (ret > 0) {
+ int err;
+
+ err = start_stop_khugepaged();
+ if (err)
+ ret = err;
+ }
return ret;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 409f67a817f1..708d0e74b61f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -449,7 +449,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
- hugepage_flags_enabled()) {
+ hugepage_pmd_enabled()) {
if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS,
PMD_ORDER))
__khugepaged_enter(vma->vm_mm);
@@ -2462,8 +2462,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
static int khugepaged_has_work(void)
{
- return !list_empty(&khugepaged_scan.mm_head) &&
- hugepage_flags_enabled();
+ return !list_empty(&khugepaged_scan.mm_head) && hugepage_pmd_enabled();
}
static int khugepaged_wait_event(void)
@@ -2536,7 +2535,7 @@ static void khugepaged_wait_work(void)
return;
}
- if (hugepage_flags_enabled())
+ if (hugepage_pmd_enabled())
wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
}
@@ -2567,7 +2566,7 @@ static void set_recommended_min_free_kbytes(void)
int nr_zones = 0;
unsigned long recommended_min;
- if (!hugepage_flags_enabled()) {
+ if (!hugepage_pmd_enabled()) {
calculate_min_free_kbytes();
goto update_wmarks;
}
@@ -2617,7 +2616,7 @@ int start_stop_khugepaged(void)
int err = 0;
mutex_lock(&khugepaged_mutex);
- if (hugepage_flags_enabled()) {
+ if (hugepage_pmd_enabled()) {
if (!khugepaged_thread)
khugepaged_thread = kthread_run(khugepaged, NULL,
"khugepaged");
@@ -2643,7 +2642,7 @@ int start_stop_khugepaged(void)
void khugepaged_min_free_kbytes_update(void)
{
mutex_lock(&khugepaged_mutex);
- if (hugepage_flags_enabled() && khugepaged_thread)
+ if (hugepage_pmd_enabled() && khugepaged_thread)
set_recommended_min_free_kbytes();
mutex_unlock(&khugepaged_mutex);
}
--
2.43.0
From: yangge <yangge1116(a)126.com>
Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for
THP-sized allocations") no longer differentiates the migration type
of pages in THP-sized PCP list, it's possible that non-movable
allocation requests may get a CMA page from the list, in some cases,
it's not acceptable.
If a large number of CMA memory are configured in system (for
example, the CMA memory accounts for 50% of the system memory),
starting a virtual machine with device passthrough will get stuck.
During starting the virtual machine, it will call
pin_user_pages_remote(..., FOLL_LONGTERM, ...) to pin memory. Normally
if a page is present and in CMA area, pin_user_pages_remote() will
migrate the page from CMA area to non-CMA area because of
FOLL_LONGTERM flag. But if non-movable allocation requests return
CMA memory, migrate_longterm_unpinnable_pages() will migrate a CMA
page to another CMA page, which will fail to pass the check in
check_and_migrate_movable_pages() and cause migration endless.
Call trace:
pin_user_pages_remote
--__gup_longterm_locked // endless loops in this function
----_get_user_pages_locked
----check_and_migrate_movable_pages
------migrate_longterm_unpinnable_pages
--------alloc_migration_target
This problem will also have a negative impact on CMA itself. For
example, when CMA is borrowed by THP, and we need to reclaim it
through cma_alloc() or dma_alloc_coherent(), we must move those
pages out to ensure CMA's users can retrieve that contigous memory.
Currently, CMA's memory is occupied by non-movable pages, meaning
we can't relocate them. As a result, cma_alloc() is more likely to
fail.
To fix the problem above, we add one PCP list for THP, which will
not introduce a new cacheline for struct per_cpu_pages. THP will
have 2 PCP lists, one PCP list is used by MOVABLE allocation, and
the other PCP list is used by UNMOVABLE allocation. MOVABLE
allocation contains GPF_MOVABLE, and UNMOVABLE allocation contains
GFP_UNMOVABLE and GFP_RECLAIMABLE.
Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations")
Signed-off-by: yangge <yangge1116(a)126.com>
---
include/linux/mmzone.h | 9 ++++-----
mm/page_alloc.c | 9 +++++++--
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b7546dd..cb7f265 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -656,13 +656,12 @@ enum zone_watermarks {
};
/*
- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
- * for THP which will usually be GFP_MOVABLE. Even if it is another type,
- * it should not contribute to serious fragmentation causing THP allocation
- * failures.
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists
+ * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list
+ * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define NR_PCP_THP 1
+#define NR_PCP_THP 2
#else
#define NR_PCP_THP 0
#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8f416a0..0a837e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -504,10 +504,15 @@ static void bad_page(struct page *page, const char *reason)
static inline unsigned int order_to_pindex(int migratetype, int order)
{
+ bool __maybe_unused movable;
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (order > PAGE_ALLOC_COSTLY_ORDER) {
VM_BUG_ON(order != HPAGE_PMD_ORDER);
- return NR_LOWORDER_PCP_LISTS;
+
+ movable = migratetype == MIGRATE_MOVABLE;
+
+ return NR_LOWORDER_PCP_LISTS + movable;
}
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
@@ -521,7 +526,7 @@ static inline int pindex_to_order(unsigned int pindex)
int order = pindex / MIGRATE_PCPTYPES;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pindex == NR_LOWORDER_PCP_LISTS)
+ if (pindex >= NR_LOWORDER_PCP_LISTS)
order = HPAGE_PMD_ORDER;
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
--
2.7.4
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.
However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.
Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.
So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.
Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.
v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)
Signed-off-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra <pallavi.mishra(a)intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Effie Yu <effie.yu(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Jose Souza <jose.souza(a)intel.com>
Cc: Michal Mrozek <michal.mrozek(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
Acked-by: Matthew Auld <matthew.auld(a)intel.com>
Acked-by: José Roberto de Souza <jose.souza(a)intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek <michal.mrozek(a)intel.com>
Acked-by: Effie Yu <effie.yu(a)intel.com> #On chat
---
drivers/gpu/drm/xe/xe_bo.c | 47 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_bo_types.h | 3 +-
include/uapi/drm/xe_drm.h | 8 +++++-
3 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
- enum ttm_caching caching;
+ enum ttm_caching caching = ttm_cached;
int err;
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
PAGE_SIZE);
- switch (bo->cpu_caching) {
- case DRM_XE_GEM_CPU_CACHING_WC:
- caching = ttm_write_combined;
- break;
- default:
- caching = ttm_cached;
- break;
- }
-
- WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
- * Display scanout is always non-coherent with the CPU cache.
- *
- * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
- * require a CPU:WC mapping.
+ * DGFX system memory is always WB / ttm_cached, since
+ * other caching modes are only supported on x86. DGFX
+ * GPU system memory accesses are always coherent with the
+ * CPU.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
- caching = ttm_write_combined;
+ if (!IS_DGFX(xe)) {
+ switch (bo->cpu_caching) {
+ case DRM_XE_GEM_CPU_CACHING_WC:
+ caching = ttm_write_combined;
+ break;
+ default:
+ caching = ttm_cached;
+ break;
+ }
+
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+ /*
+ * Display scanout is always non-coherent with the CPU cache.
+ *
+ * For Xe_LPG and beyond, PPGTT PTE lookups are also
+ * non-coherent and require a CPU:WC mapping.
+ */
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 &&
+ bo->flags & XE_BO_FLAG_PAGETABLE))
+ caching = ttm_write_combined;
+ }
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 02d68873558a..ebc8abf7930a 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -68,7 +68,8 @@ struct xe_bo {
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
- * objects.
+ * objects. Exceptions are system memory on DGFX, which is always
+ * WB.
*/
u16 cpu_caching;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 33544ef78d3e..83474125f3db 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
#define DRM_XE_GEM_CPU_CACHING_WC 2
/**
* @cpu_caching: The CPU caching mode to select for this object. If
- * mmaping the object the mode selected here will also be used.
+ * mmaping the object the mode selected here will also be used. The
+ * exception is when mapping system memory (including evicted
+ * system memory) on discrete GPUs. The caching mode selected will
+ * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+ * between GPU- and CPU is guaranteed. The caching mode of
+ * existing CPU-mappings will be updated transparently to
+ * user-space clients.
*/
__u16 cpu_caching;
/** @pad: MBZ */
--
2.44.0