Greetings,
I wonder why you continue neglecting my emails. Please, acknowledge
the receipt of this message in reference to the subject above as I
intend to send to you the details of the mail. Sometimes, try to check
your spam box because most of these correspondences fall out sometimes
in SPAM folder.
Best regards,
Le 06/02/2023 à 14:45, Sasha Levin a écrit :
> This is a note to let you know that I've just added the patch titled
>
> powerpc/bpf: Move common helpers into bpf_jit.h
>
> to the 5.10-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> powerpc-bpf-move-common-helpers-into-bpf_jit.h.patch
> and it can be found in the queue-5.10 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
This commit was part of a series that removed classical BPF and
implemented EBPF on powerpc32.
At that time PPC64 already had EBPF.
This commit is to make some parts of the code common for PPC32 and PPC64
because at the end of the series both are EBPF.
But I doubt this commit alone can be applied without first removing
classical BPF (commit 6944caad78fc ("powerpc/bpf: Remove classical BPF
support for PPC32"))
Christophe
>
>
> commit 76f74e69efb95e4add82e5b687e062f2c989739f
> Author: Christophe Leroy <christophe.leroy(a)csgroup.eu>
> Date: Mon Mar 22 16:37:48 2021 +0000
>
> powerpc/bpf: Move common helpers into bpf_jit.h
>
> [ Upstream commit f1b1583d5faa86cb3dcb7b740594868debad7c30 ]
>
> Move functions bpf_flush_icache(), bpf_is_seen_register() and
> bpf_set_seen_register() in order to reuse them in future
> bpf_jit_comp32.c
>
> Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
> Link: https://lore.kernel.org/r/28e8d5a75e64807d7e9d39a4b52658755e259f8c.16164309…
> Stable-dep-of: 71f656a50176 ("bpf: Fix to preserve reg parent/live fields when copying range info")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 1a5b4da8a235..cd9aab6ec2c5 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -117,6 +117,41 @@
> #define COND_LT (CR0_LT | COND_CMP_TRUE)
> #define COND_LE (CR0_GT | COND_CMP_FALSE)
>
> +#define SEEN_FUNC 0x1000 /* might call external helpers */
> +#define SEEN_STACK 0x2000 /* uses BPF stack */
> +#define SEEN_TAILCALL 0x4000 /* uses tail calls */
> +
> +struct codegen_context {
> + /*
> + * This is used to track register usage as well
> + * as calls to external helpers.
> + * - register usage is tracked with corresponding
> + * bits (r3-r10 and r27-r31)
> + * - rest of the bits can be used to track other
> + * things -- for now, we use bits 16 to 23
> + * encoded in SEEN_* macros above
> + */
> + unsigned int seen;
> + unsigned int idx;
> + unsigned int stack_size;
> +};
> +
> +static inline void bpf_flush_icache(void *start, void *end)
> +{
> + smp_wmb(); /* smp write barrier */
> + flush_icache_range((unsigned long)start, (unsigned long)end);
> +}
> +
> +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
> +{
> + return ctx->seen & (1 << (31 - i));
> +}
> +
> +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
> +{
> + ctx->seen |= 1 << (31 - i);
> +}
> +
> #endif
>
> #endif
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index 4d164e865b39..201b83bfa869 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -86,25 +86,6 @@ static const int b2p[] = {
> } while(0)
> #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
>
> -#define SEEN_FUNC 0x1000 /* might call external helpers */
> -#define SEEN_STACK 0x2000 /* uses BPF stack */
> -#define SEEN_TAILCALL 0x4000 /* uses tail calls */
> -
> -struct codegen_context {
> - /*
> - * This is used to track register usage as well
> - * as calls to external helpers.
> - * - register usage is tracked with corresponding
> - * bits (r3-r10 and r27-r31)
> - * - rest of the bits can be used to track other
> - * things -- for now, we use bits 16 to 23
> - * encoded in SEEN_* macros above
> - */
> - unsigned int seen;
> - unsigned int idx;
> - unsigned int stack_size;
> -};
> -
> #endif /* !__ASSEMBLY__ */
>
> #endif
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 7da59ddc90dd..ebad2c79cd6f 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -24,22 +24,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
> memset32(area, BREAKPOINT_INSTRUCTION, size/4);
> }
>
> -static inline void bpf_flush_icache(void *start, void *end)
> -{
> - smp_wmb();
> - flush_icache_range((unsigned long)start, (unsigned long)end);
> -}
> -
> -static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
> -{
> - return ctx->seen & (1 << (31 - i));
> -}
> -
> -static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
> -{
> - ctx->seen |= 1 << (31 - i);
> -}
> -
> static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
> {
> /*
When we upgraded our kernel, we started seeing some page corruption like
the following consistently:
BUG: Bad page state in process ganesha.nfsd pfn:1304ca
page:0000000022261c55 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x1304ca
flags: 0x17ffffc0000000()
raw: 0017ffffc0000000 ffff8a513ffd4c98 ffffeee24b35ec08 0000000000000000
raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000
page dumped because: nonzero mapcount
CPU: 0 PID: 15567 Comm: ganesha.nfsd Kdump: loaded Tainted: P B O 5.10.158-1.nutanix.20221209.el7.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
Call Trace:
dump_stack+0x74/0x96
bad_page.cold+0x63/0x94
check_new_page_bad+0x6d/0x80
rmqueue+0x46e/0x970
get_page_from_freelist+0xcb/0x3f0
? _cond_resched+0x19/0x40
__alloc_pages_nodemask+0x164/0x300
alloc_pages_current+0x87/0xf0
skb_page_frag_refill+0x84/0x110
...
Sometimes, it would also show up as corruption in the free list pointer and
cause crashes.
After bisecting the issue, we found the issue started from e320d3012d25:
if (put_page_testzero(page))
free_the_page(page, order);
else if (!PageHead(page))
while (order-- > 0)
free_the_page(page + (1 << order), order);
So the problem is the check PageHead is racy because at this point we
already dropped our reference to the page. So even if we came in with
compound page, the page can already be freed and PageHead can return
false and we will end up freeing all the tail pages causing double free.
Fixes: e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages")
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: linux-mm(a)kvack.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Chunwei Chen <david.chen(a)nutanix.com>
---
mm/page_alloc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0745aedebb37..3bb3484563ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5631,9 +5631,12 @@ EXPORT_SYMBOL(get_zeroed_page);
*/
void __free_pages(struct page *page, unsigned int order)
{
+ /* get PageHead before we drop reference */
+ int head = PageHead(page);
+
if (put_page_testzero(page))
free_the_page(page, order);
- else if (!PageHead(page))
+ else if (!head)
while (order-- > 0)
free_the_page(page + (1 << order), order);
}
--
2.22.3
Hi Greg. Would be great if you could pick up 690eb7dec72a ("HID:
logitech: Disable hi-res scrolling on USB") for the next 6.1.y release,
as it's fixing a regression I saw multiple people report.
The commit (see below) that was recently merged to mainline and has a
proper stable "Cc: <stable@...>" tag, so I guess you scripts will at
some point pick it up automatically. But I noticed you updated the
stable queue and hour ago and this patch afaics is not in it yet
(despite some other patches being in it that were merged later), so I
thought: just to be sure send a quick heads up.
Ciao, Thorsten
On 09.02.23 19:10, Linux Kernel Mailing List wrote:
> Commit: 690eb7dec72ae52d1d710d14a451844b4d0f4f19
> Parent: ea427a222d8bdf2bc1a8a6da3ebe247f7dced70c
> Refname: refs/heads/master
> Web: https://git.kernel.org/torvalds/c/690eb7dec72ae52d1d710d14a451844b4d0f4f19
> Author: Bastien Nocera <hadess(a)hadess.net>
> AuthorDate: Fri Feb 3 11:18:00 2023 +0100
> Committer: Benjamin Tissoires <benjamin.tissoires(a)redhat.com>
> CommitDate: Mon Feb 6 10:58:15 2023 +0100
>
> HID: logitech: Disable hi-res scrolling on USB
>
> On some Logitech mice, such as the G903, and possibly the G403, the HID
> events are generated on a different interface to the HID++ one.
>
> If we enable hi-res through the HID++ interface, the HID interface
> wouldn't know anything about it, and handle the events as if they were
> regular scroll events, making the mouse unusable.
>
> Disable hi-res scrolling on those devices until we implement scroll
> events through HID++.
>
> Signed-off-by: Bastien Nocera <hadess(a)hadess.net>
> Tested-by: Tobias Klausmann <klausman(a)schwarzvogel.de>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=216885
> Fixes: 908d325e1665 ("HID: logitech-hidpp: Detect hi-res scrolling support")
> Cc: stable(a)vger.kernel.org
> Link: https://lore.kernel.org/r/20230203101800.139380-1-hadess@hadess.net
> Signed-off-by: Benjamin Tissoires <benjamin.tissoires(a)redhat.com>
> ---
> drivers/hid/hid-logitech-hidpp.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
> index abf2c95e4d0b0..9c1ee8e91e0ca 100644
> --- a/drivers/hid/hid-logitech-hidpp.c
> +++ b/drivers/hid/hid-logitech-hidpp.c
> @@ -3978,7 +3978,8 @@ static void hidpp_connect_event(struct hidpp_device *hidpp)
> }
>
> hidpp_initialize_battery(hidpp);
> - hidpp_initialize_hires_scroll(hidpp);
> + if (!hid_is_usb(hidpp->hid_dev))
> + hidpp_initialize_hires_scroll(hidpp);
>
> /* forward current battery state */
> if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) {
>
There is no separate blank line between target_remove_from_tmr_list() and
transport_cmd_check_stop_to_fabric
As per coding-style, it is require to separate functions with one blank line.
Fixes: 12b6fcd0ea7f ("scsi: target: core: Remove from tmr_list during LUN unlink")
Signed-off-by: Alok Tiwari <alok.a.tiwari(a)oracle.com>
---
drivers/target/target_core_transport.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 5926316252eb..f1cdf78fc5ef 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -691,6 +691,7 @@ static void target_remove_from_tmr_list(struct se_cmd *cmd)
spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
}
}
+
/*
* This function is called by the target core after the target core has
* finished processing a SCSI command or SCSI TMF. Both the regular command
--
2.39.1
When a connexion was established without going through
NL80211_CMD_CONNECT, the ssid was never set in the wireless_dev struct.
Now we set it during when an NL80211_CMD_AUTHENTICATE is issued.
It may be needed to test this on some additional hardware (tested with
iwlwifi and a AX201, and iwd on the userspace side), I could not test
things like roaming and p2p.
This applies to v6.2-rc7,
but I think it may also be interesting to backport it from 5.19 to 6.1
Reported-by: Yohan Prod'homme <kernel(a)zoddo.fr>
Fixes: 7b0a0e3c3a88260b6fcb017e49f198463aa62ed1
Cc: linux-wireless(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216711
Signed-off-by: Marc Bornand <dev.mbornand(a)systemb.ch>
---
net/wireless/nl80211.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 33a82ecab9d5..f1627ea542b9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10552,6 +10552,10 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
return -ENOENT;
wdev_lock(dev->ieee80211_ptr);
+
+ memcpy(dev->ieee80211_ptr->u.client.ssid, ssid, ssid_len);
+ dev->ieee80211_ptr->u.client.ssid_len = ssid_len;
+
err = cfg80211_mlme_auth(rdev, dev, &req);
wdev_unlock(dev->ieee80211_ptr);
@@ -11025,6 +11029,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
wdev_lock(dev->ieee80211_ptr);
+
+ if (reason_code == WLAN_REASON_DEAUTH_LEAVING) {
+ dev->ieee80211_ptr->u.client.ssid_len = 0;
+ }
+
err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
local_state_change);
wdev_unlock(dev->ieee80211_ptr);
--
2.39.1
I did a bisect which implicated
f2bd1c5ae2cb0cf9525c9bffc0038c12dd7e1338 as the first bad commit.
Reverting this commit on 6.1.8 gives me working sound again.
I'm not clear why this is breaking 6.1.x since it appears to be in
6.0.18 (7494e2e6c55e), which was the last working package in Fedora
for the 6.0 line. Maybe something else didn't make it into 6.1?
When preparing an AER-CTR request, the driver copies the key provided by
the user into a data structure that is accessible by the firmware.
If the target device is QAT GEN4, the key size is rounded up by 16 since
a rounded up size is expected by the device.
If the key size is rounded up before the copy, the size used for copying
the key might be bigger than the size of the region containing the key,
causing an out-of-bounds read.
Fix by doing the copy first and then update the keylen.
This is to fix the following warning reported by KASAN:
[ 138.150574] BUG: KASAN: global-out-of-bounds in qat_alg_skcipher_init_com.isra.0+0x197/0x250 [intel_qat]
[ 138.150641] Read of size 32 at addr ffffffff88c402c0 by task cryptomgr_test/2340
[ 138.150651] CPU: 15 PID: 2340 Comm: cryptomgr_test Not tainted 6.2.0-rc1+ #45
[ 138.150659] Hardware name: Intel Corporation ArcherCity/ArcherCity, BIOS EGSDCRB1.86B.0087.D13.2208261706 08/26/2022
[ 138.150663] Call Trace:
[ 138.150668] <TASK>
[ 138.150922] kasan_check_range+0x13a/0x1c0
[ 138.150931] memcpy+0x1f/0x60
[ 138.150940] qat_alg_skcipher_init_com.isra.0+0x197/0x250 [intel_qat]
[ 138.151006] qat_alg_skcipher_init_sessions+0xc1/0x240 [intel_qat]
[ 138.151073] crypto_skcipher_setkey+0x82/0x160
[ 138.151085] ? prepare_keybuf+0xa2/0xd0
[ 138.151095] test_skcipher_vec_cfg+0x2b8/0x800
Fixes: 67916c951689 ("crypto: qat - add AES-CTR support for QAT GEN4 devices")
Cc: <stable(a)vger.kernel.org>
Reported-by: Vladis Dronov <vdronov(a)redhat.com>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu(a)intel.com>
Reviewed-by: Fiona Trahe <fiona.trahe(a)intel.com>
---
drivers/crypto/qat/qat_common/qat_algs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index b4b9f0aa59b9..b61ada559158 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -435,8 +435,8 @@ static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx,
} else if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_CTR_MODE) {
ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE);
- keylen = round_up(keylen, 16);
memcpy(cd->ucs_aes.key, key, keylen);
+ keylen = round_up(keylen, 16);
} else {
memcpy(cd->aes.key, key, keylen);
}
--
2.39.1
Commit 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap()
on Altra machines") identifies the Altra family via the family field in
the type#1 SMBIOS record. eMAG and Altra Max machines are similarly
affected but not detected with the strict strcmp test.
The type1_family smbios string is not an entirely reliable means of
identifying systems with this issue as OEMs can, and do, use their own
strings for these fields. However, until we have a better solution,
capture the bulk of these systems by adding strcmp matching for "eMAG"
and "Altra Max".
Fixes: 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines")
Cc: <stable(a)vger.kernel.org> # 6.1.x
Cc: <linux-efi(a)vger.kernel.org>
Cc: Alexandru Elisei <alexandru.elisei(a)gmail.com>
Cc: Justin He <Justin.He(a)arm.com>
Cc: Huacai Chen <chenhuacai(a)kernel.org>
Cc: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Signed-off-by: Darren Hart <darren(a)os.amperecomputing.com>
---
V1 -> V2: include eMAG
drivers/firmware/efi/libstub/arm64.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c
index ff2d18c42ee7..4501652e11ab 100644
--- a/drivers/firmware/efi/libstub/arm64.c
+++ b/drivers/firmware/efi/libstub/arm64.c
@@ -19,10 +19,13 @@ static bool system_needs_vamap(void)
const u8 *type1_family = efi_get_smbios_string(1, family);
/*
- * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap()
- * has not been called prior.
+ * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if
+ * SetVirtualAddressMap() has not been called prior.
*/
- if (!type1_family || strcmp(type1_family, "Altra"))
+ if (!type1_family || (
+ strcmp(type1_family, "eMAG") &&
+ strcmp(type1_family, "Altra") &&
+ strcmp(type1_family, "Altra Max")))
return false;
efi_warn("Working around broken SetVirtualAddressMap()\n");
--
2.34.3
The quilt patch titled
Subject: memory tier: release the new_memtier in find_create_memory_tier()
has been removed from the -mm tree. Its filename was
memory-tier-release-the-new_memtier-in-find_create_memory_tier.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Tong Tiangen <tongtiangen(a)huawei.com>
Subject: memory tier: release the new_memtier in find_create_memory_tier()
Date: Sun, 29 Jan 2023 04:06:51 +0000
In find_create_memory_tier(), if failed to register device, then we should
release new_memtier from the tier list and put device instead of memtier.
Link: https://lkml.kernel.org/r/20230129040651.1329208-1-tongtiangen@huawei.com
Fixes: 9832fb87834e ("mm/demotion: expose memory tier details via sysfs")
Signed-off-by: Tong Tiangen <tongtiangen(a)huawei.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Hanjun Guo <guohanjun(a)huawei.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Guohanjun <guohanjun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-tiers.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/memory-tiers.c~memory-tier-release-the-new_memtier-in-find_create_memory_tier
+++ a/mm/memory-tiers.c
@@ -211,8 +211,8 @@ static struct memory_tier *find_create_m
ret = device_register(&new_memtier->dev);
if (ret) {
- list_del(&memtier->list);
- put_device(&memtier->dev);
+ list_del(&new_memtier->list);
+ put_device(&new_memtier->dev);
return ERR_PTR(ret);
}
memtier = new_memtier;
_
Patches currently in -mm which might be from tongtiangen(a)huawei.com are
The quilt patch titled
Subject: of: reserved_mem: Have kmemleak ignore dynamically allocated reserved mem
has been removed from the -mm tree. Its filename was
of-reserved_mem-have-kmemleak-ignore-dynamically-allocated-reserved-mem.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: of: reserved_mem: Have kmemleak ignore dynamically allocated reserved mem
Date: Wed, 8 Feb 2023 15:20:00 -0800
Patch series "Fix kmemleak crashes when scanning CMA regions", v2.
When trying to boot a device with an ARM64 kernel with the following
config options enabled:
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
CONFIG_DEBUG_KMEMLEAK=y
a crash is encountered when kmemleak starts to scan the list of gray
or allocated objects that it maintains. Upon closer inspection, it was
observed that these page-faults always occurred when kmemleak attempted
to scan a CMA region.
At the moment, kmemleak is made aware of CMA regions that are specified
through the devicetree to be dynamically allocated within a range of
addresses. However, kmemleak should not need to scan CMA regions or any
reserved memory region, as those regions can be used for DMA transfers
between drivers and peripherals, and thus wouldn't contain anything
useful for kmemleak.
Additionally, since CMA regions are unmapped from the kernel's address
space when they are freed to the buddy allocator at boot when
CONFIG_DEBUG_PAGEALLOC is enabled, kmemleak shouldn't attempt to access
those memory regions, as that will trigger a crash. Thus, kmemleak
should ignore all dynamically allocated reserved memory regions.
This patch (of 1):
Currently, kmemleak ignores dynamically allocated reserved memory regions
that don't have a kernel mapping. However, regions that do retain a
kernel mapping (e.g. CMA regions) do get scanned by kmemleak.
This is not ideal for two reasons:
1 kmemleak works by scanning memory regions for pointers to allocated
objects to determine if those objects have been leaked or not.
However, reserved memory regions can be used between drivers and
peripherals for DMA transfers, and thus, would not contain pointers to
allocated objects, making it unnecessary for kmemleak to scan these
reserved memory regions.
2 When CONFIG_DEBUG_PAGEALLOC is enabled, along with kmemleak, the
CMA reserved memory regions are unmapped from the kernel's address
space when they are freed to buddy at boot. These CMA reserved regions
are still tracked by kmemleak, however, and when kmemleak attempts to
scan them, a crash will happen, as accessing the CMA region will result
in a page-fault, since the regions are unmapped.
Thus, use kmemleak_ignore_phys() for all dynamically allocated reserved
memory regions, instead of those that do not have a kernel mapping
associated with them.
Link: https://lkml.kernel.org/r/20230208232001.2052777-1-isaacmanjarres@google.com
Link: https://lkml.kernel.org/r/20230208232001.2052777-2-isaacmanjarres@google.com
Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Acked-by: Mike Rapoport (IBM) <rppt(a)kernel.org>
Acked-by: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Frank Rowand <frowand.list(a)gmail.com>
Cc: Kirill A. Shutemov <kirill.shtuemov(a)linux.intel.com>
Cc: Nick Kossifidis <mick(a)ics.forth.gr>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Rob Herring <robh(a)kernel.org>
Cc: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Cc: Saravana Kannan <saravanak(a)google.com>
Cc: <stable(a)vger.kernel.org> [5.15+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/of/of_reserved_mem.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/drivers/of/of_reserved_mem.c~of-reserved_mem-have-kmemleak-ignore-dynamically-allocated-reserved-mem
+++ a/drivers/of/of_reserved_mem.c
@@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_re
err = memblock_mark_nomap(base, size);
if (err)
memblock_phys_free(base, size);
- kmemleak_ignore_phys(base);
}
+ kmemleak_ignore_phys(base);
+
return err;
}
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
The quilt patch titled
Subject: scripts/gdb: fix 'lx-current' for x86
has been removed from the -mm tree. Its filename was
scripts-gdb-fix-lx-current-for-x86.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jeff Xie <xiehuan09(a)gmail.com>
Subject: scripts/gdb: fix 'lx-current' for x86
Date: Sat, 4 Feb 2023 17:01:39 +0800
When printing the name of the current process, it will report an error:
(gdb) p $lx_current().comm Python Exception <class 'gdb.error'> No symbol
"current_task" in current context.: Error occurred in Python: No symbol
"current_task" in current context.
Because e57ef2ed97c1 ("x86: Put hot per CPU variables into a struct")
changed it.
Link: https://lkml.kernel.org/r/20230204090139.1789264-1-xiehuan09@gmail.com
Fixes: e57ef2ed97c1 ("x86: Put hot per CPU variables into a struct")
Signed-off-by: Jeff Xie <xiehuan09(a)gmail.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/cpus.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/scripts/gdb/linux/cpus.py~scripts-gdb-fix-lx-current-for-x86
+++ a/scripts/gdb/linux/cpus.py
@@ -163,7 +163,7 @@ def get_current_task(cpu):
task_ptr_type = task_type.get_type().pointer()
if utils.is_target_arch("x86"):
- var_ptr = gdb.parse_and_eval("¤t_task")
+ var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task")
return per_cpu(var_ptr, cpu).dereference()
elif utils.is_target_arch("aarch64"):
current_task_addr = gdb.parse_and_eval("$SP_EL0")
_
Patches currently in -mm which might be from xiehuan09(a)gmail.com are
The patch titled
Subject: mm/page_alloc.c: fix page corruption caused by racy check in __free_pages
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fix-page-corruption-caused-by-racy-check-in-__free_pages.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Chen <david.chen(a)nutanix.com>
Subject: mm/page_alloc.c: fix page corruption caused by racy check in __free_pages
Date: Thu, 9 Feb 2023 17:48:28 +0000
When we upgraded our kernel, we started seeing some page corruption like
the following consistently:
BUG: Bad page state in process ganesha.nfsd pfn:1304ca
page:0000000022261c55 refcount:0 mapcount:-128 mapping:0000000000000000 in=
dex:0x0 pfn:0x1304ca
flags: 0x17ffffc0000000()
raw: 0017ffffc0000000 ffff8a513ffd4c98 ffffeee24b35ec08 0000000000000000
raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000
page dumped because: nonzero mapcount
CPU: 0 PID: 15567 Comm: ganesha.nfsd Kdump: loaded Tainted: P B O =
5.10.158-1.nutanix.20221209.el7.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Referenc=
e Platform, BIOS 6.00 04/05/2016
Call Trace:
dump_stack+0x74/0x96
bad_page.cold+0x63/0x94
check_new_page_bad+0x6d/0x80
rmqueue+0x46e/0x970
get_page_from_freelist+0xcb/0x3f0
? _cond_resched+0x19/0x40
__alloc_pages_nodemask+0x164/0x300
alloc_pages_current+0x87/0xf0
skb_page_frag_refill+0x84/0x110
...
Sometimes, it would also show up as corruption in the free list pointer and
cause crashes.
After bisecting the issue, we found the issue started from e320d3012d25:
if (put_page_testzero(page))
free_the_page(page, order);
else if (!PageHead(page))
while (order-- > 0)
free_the_page(page + (1 << order), order);
So the problem is the check PageHead is racy because at this point we
already dropped our reference to the page. So even if we came in with
compound page, the page can already be freed and PageHead can return false
and we will end up freeing all the tail pages causing double free.
Link: https://lkml.kernel.org/r/Message-ID:
Fixes: e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages")
Signed-off-by: Chunwei Chen <david.chen(a)nutanix.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/mm/page_alloc.c~fix-page-corruption-caused-by-racy-check-in-__free_pages
+++ a/mm/page_alloc.c
@@ -5631,9 +5631,12 @@ EXPORT_SYMBOL(get_zeroed_page);
*/
void __free_pages(struct page *page, unsigned int order)
{
+ /* get PageHead before we drop reference */
+ int head =3D PageHead(page);
+
if (put_page_testzero(page))
free_the_page(page, order);
- else if (!PageHead(page))
+ else if (!head)
while (order-- > 0)
free_the_page(page + (1 << order), order);
}
_
Patches currently in -mm which might be from david.chen(a)nutanix.com are
fix-page-corruption-caused-by-racy-check-in-__free_pages.patch
--
Hello Dear
Am a dying woman here in the hospital, i was diagnose as a
Coronavirus patient over 2 months ago. I am A business woman who is
dealing with Gold Exportation, I Am 59 year old from USA California i
have a charitable and unfufilling project that am about to handover
to you, if you are interested to know more about this project please reply me.
Hope to hear from you
Best Regard
Margaret Christopher
Atm, drm_dp_remove_payload() uses the same payload state to both get the
vc_start_slot required for the payload removal DPCD message and to
deduct time_slots from vc_start_slot of all payloads after the one being
removed.
The above isn't always correct, as vc_start_slot must be the up-to-date
version contained in the new payload state, but time_slots must be the
one used when the payload was previously added, contained in the old
payload state. The new payload's time_slots can change vs. the old one
if the current atomic commit changes the corresponding mode.
This patch let's drivers pass the old and new payload states to
drm_dp_remove_payload(), but keeps these the same for now in all drivers
not to change the behavior. A follow-up i915 patch will pass in that
driver the correct old and new states to the function.
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Ben Skeggs <bskeggs(a)redhat.com>
Cc: Karol Herbst <kherbst(a)redhat.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Wayne Lin <Wayne.Lin(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.1
Cc: dri-devel(a)lists.freedesktop.org
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
.../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +-
drivers/gpu/drm/display/drm_dp_mst_topology.c | 26 ++++++++++---------
drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++-
drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +-
include/drm/display/drm_dp_mst_helper.h | 3 ++-
5 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index a50319fc42b11..180d3893b68da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -208,7 +208,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
if (enable)
drm_dp_add_payload_part1(mst_mgr, mst_state, payload);
else
- drm_dp_remove_payload(mst_mgr, mst_state, payload);
+ drm_dp_remove_payload(mst_mgr, mst_state, payload, payload);
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
* AUX message. The sequence is slot 1-63 allocated sequence for each
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 847c10aa2098c..1990ff5dc7ddd 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3342,7 +3342,8 @@ EXPORT_SYMBOL(drm_dp_add_payload_part1);
* drm_dp_remove_payload() - Remove an MST payload
* @mgr: Manager to use.
* @mst_state: The MST atomic state
- * @payload: The payload to write
+ * @old_payload: The payload with its old state
+ * @new_payload: The payload to write
*
* Removes a payload from an MST topology if it was successfully assigned a start slot. Also updates
* the starting time slots of all other payloads which would have been shifted towards the start of
@@ -3350,36 +3351,37 @@ EXPORT_SYMBOL(drm_dp_add_payload_part1);
*/
void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_topology_state *mst_state,
- struct drm_dp_mst_atomic_payload *payload)
+ const struct drm_dp_mst_atomic_payload *old_payload,
+ struct drm_dp_mst_atomic_payload *new_payload)
{
struct drm_dp_mst_atomic_payload *pos;
bool send_remove = false;
/* We failed to make the payload, so nothing to do */
- if (payload->vc_start_slot == -1)
+ if (new_payload->vc_start_slot == -1)
return;
mutex_lock(&mgr->lock);
- send_remove = drm_dp_mst_port_downstream_of_branch(payload->port, mgr->mst_primary);
+ send_remove = drm_dp_mst_port_downstream_of_branch(new_payload->port, mgr->mst_primary);
mutex_unlock(&mgr->lock);
if (send_remove)
- drm_dp_destroy_payload_step1(mgr, mst_state, payload);
+ drm_dp_destroy_payload_step1(mgr, mst_state, new_payload);
else
drm_dbg_kms(mgr->dev, "Payload for VCPI %d not in topology, not sending remove\n",
- payload->vcpi);
+ new_payload->vcpi);
list_for_each_entry(pos, &mst_state->payloads, next) {
- if (pos != payload && pos->vc_start_slot > payload->vc_start_slot)
- pos->vc_start_slot -= payload->time_slots;
+ if (pos != new_payload && pos->vc_start_slot > new_payload->vc_start_slot)
+ pos->vc_start_slot -= old_payload->time_slots;
}
- payload->vc_start_slot = -1;
+ new_payload->vc_start_slot = -1;
mgr->payload_count--;
- mgr->next_start_slot -= payload->time_slots;
+ mgr->next_start_slot -= old_payload->time_slots;
- if (payload->delete)
- drm_dp_mst_put_port_malloc(payload->port);
+ if (new_payload->delete)
+ drm_dp_mst_put_port_malloc(new_payload->port);
}
EXPORT_SYMBOL(drm_dp_remove_payload);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index f3cb12dcfe0a7..dc4e5ff1dbb31 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -526,6 +526,8 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
to_intel_connector(old_conn_state->connector);
struct drm_dp_mst_topology_state *mst_state =
drm_atomic_get_mst_topology_state(&state->base, &intel_dp->mst_mgr);
+ struct drm_dp_mst_atomic_payload *payload =
+ drm_atomic_get_mst_payload_state(mst_state, connector->port);
struct drm_i915_private *i915 = to_i915(connector->base.dev);
drm_dbg_kms(&i915->drm, "active links %d\n",
@@ -534,7 +536,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
intel_hdcp_disable(intel_mst->connector);
drm_dp_remove_payload(&intel_dp->mst_mgr, mst_state,
- drm_atomic_get_mst_payload_state(mst_state, connector->port));
+ payload, payload);
intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index edcb2529b4025..ed9d374147b8d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -885,7 +885,7 @@ nv50_msto_prepare(struct drm_atomic_state *state,
// TODO: Figure out if we want to do a better job of handling VCPI allocation failures here?
if (msto->disabled) {
- drm_dp_remove_payload(mgr, mst_state, payload);
+ drm_dp_remove_payload(mgr, mst_state, payload, payload);
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
} else {
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 41fd8352ab656..f5eb9aa152b14 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -841,7 +841,8 @@ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_atomic_payload *payload);
void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_topology_state *mst_state,
- struct drm_dp_mst_atomic_payload *payload);
+ const struct drm_dp_mst_atomic_payload *old_payload,
+ struct drm_dp_mst_atomic_payload *new_payload);
int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr);
--
2.37.1
The patch titled
Subject: mm/MADV_COLLAPSE: set EAGAIN on unexpected page refcount
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-madv_collapse-set-eagain-on-unexpected-page-refcount.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Zach O'Keefe" <zokeefe(a)google.com>
Subject: mm/MADV_COLLAPSE: set EAGAIN on unexpected page refcount
Date: Tue, 24 Jan 2023 17:57:37 -0800
During collapse, in a few places we check to see if a given small page has
any unaccounted references. If the refcount on the page doesn't match our
expectations, it must be there is an unknown user concurrently interested
in the page, and so it's not safe to move the contents elsewhere.
However, the unaccounted pins are likely an ephemeral state.
In such a situation, make MADV_COLLAPSE set EAGAIN errno, indicating that
collapse may succeed on retry.
Link: https://lkml.kernel.org/r/20230125015738.912924-1-zokeefe@google.com
Fixes: 7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
Signed-off-by: Zach O'Keefe <zokeefe(a)google.com>
Reported-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/khugepaged.c~mm-madv_collapse-set-eagain-on-unexpected-page-refcount
+++ a/mm/khugepaged.c
@@ -2611,6 +2611,7 @@ static int madvise_collapse_errno(enum s
case SCAN_CGROUP_CHARGE_FAIL:
return -EBUSY;
/* Resource temporary unavailable - trying again might succeed */
+ case SCAN_PAGE_COUNT:
case SCAN_PAGE_LOCK:
case SCAN_PAGE_LRU:
case SCAN_DEL_PAGE_LRU:
_
Patches currently in -mm which might be from zokeefe(a)google.com are
mm-madv_collapse-set-eagain-on-unexpected-page-refcount.patch
Currently, kmemleak ignores dynamically allocated reserved memory
regions that don't have a kernel mapping. However, regions that do
retain a kernel mapping (e.g. CMA regions) do get scanned by kmemleak.
This is not ideal for two reasons:
1. kmemleak works by scanning memory regions for pointers to
allocated objects to determine if those objects have been leaked
or not. However, reserved memory regions can be used between drivers
and peripherals for DMA transfers, and thus, would not contain pointers
to allocated objects, making it unnecessary for kmemleak to scan
these reserved memory regions.
2. When CONFIG_DEBUG_PAGEALLOC is enabled, along with kmemleak, the
CMA reserved memory regions are unmapped from the kernel's address
space when they are freed to buddy at boot. These CMA reserved regions
are still tracked by kmemleak, however, and when kmemleak attempts to
scan them, a crash will happen, as accessing the CMA region will result
in a page-fault, since the regions are unmapped.
Thus, use kmemleak_ignore_phys() for all dynamically allocated reserved
memory regions, instead of those that do not have a kernel mapping
associated with them.
Cc: <stable(a)vger.kernel.org> # 5.15+
Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
---
drivers/of/of_reserved_mem.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 65f3b02a0e4e..f90975e00446 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
err = memblock_mark_nomap(base, size);
if (err)
memblock_phys_free(base, size);
- kmemleak_ignore_phys(base);
}
+ kmemleak_ignore_phys(base);
+
return err;
}
--
2.39.1.581.gbfd45094c4-goog
Intel IOMMU driver implements IOTLB flush queue with domain selective
or PASID selective invalidations. In this case there's no need to track
IOVA page range and sync IOTLBs, which may cause significant performance
hit.
This patch adds a check to avoid IOVA gather page and IOTLB sync for
the lazy path.
The performance difference on Sapphire Rapids 100Gb NIC is improved by
the following (as measured by iperf send):
w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
Cc: <stable(a)vger.kernel.org>
Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Tested-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
---
v3: reword comments, add reviewed by tag. (Kevin)
v2: use helper function iommu_iotlb_gather_queued() instead of open
coding
---
drivers/iommu/intel/iommu.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 59df7e42fd53..2ee270e4d484 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4346,7 +4346,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
- iommu_iotlb_gather_add_page(domain, gather, iova, size);
+ /*
+ * We do not use page-selective IOTLB invalidation in flush queue,
+ * so there is no need to track page and sync iotlb.
+ */
+ if (!iommu_iotlb_gather_queued(gather))
+ iommu_iotlb_gather_add_page(domain, gather, iova, size);
return size;
}
--
2.25.1
Intel IOMMU driver implements IOTLB flush queue with domain selective
or PASID selective invalidations. In this case there's no need to track
IOVA page range and sync IOTLBs, which may cause significant performance
hit.
This patch adds a check to avoid IOVA gather page and IOTLB sync for
the lazy path.
The performance difference on Sapphire Rapids 100Gb NIC is improved by
the following (as measured by iperf send):
w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
Cc: <stable(a)vger.kernel.org>
Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
Tested-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
---
v2: use helper function iommu_iotlb_gather_queued() instead of open
coding
---
drivers/iommu/intel/iommu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 161342e7149d..18265fa07828 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4348,7 +4348,13 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
- iommu_iotlb_gather_add_page(domain, gather, iova, size);
+ /*
+ * We do not use page-selective IOTLB invalidation in flush queue,
+ * There is no need to track page and sync iotlb. Domain-selective or
+ * PASID-selective validation are used in the flush queue.
+ */
+ if (!iommu_iotlb_gather_queued(gather))
+ iommu_iotlb_gather_add_page(domain, gather, iova, size);
return size;
}
--
2.25.1