As a result of the previous two patches, there are no circumstances
in which a swapped-in page is installed in a page table without first
having arch_swap_restore() called on it. Therefore, we no longer need
the logic in set_pte_at() that restores the tags, so remove it.
Because we can now rely on the page being locked, we no longer need to
handle the case where a page is having its tags restored by multiple tasks
concurrently, so we can slightly simplify the logic in mte_restore_tags().
This patch also fixes an issue where a page can have PG_mte_tagged set
with uninitialized tags. The issue is that the mte_sync_page_tags()
function sets PG_mte_tagged if it initializes page tags. Then we
return to mte_sync_tags(), which sets PG_mte_tagged again. At best,
this is redundant. However, it is possible for mte_sync_page_tags()
to return without having initialized tags for the page, i.e. in the
case where check_swap is true (non-compound page), is_swap_pte(old_pte)
is false and pte_is_tagged is false. So at worst, we set PG_mte_tagged
on a page with uninitialized tags. This can happen if, for example,
page migration causes a PTE for an untagged page to be replaced. If the
userspace program subsequently uses mprotect() to enable PROT_MTE for
that page, the uninitialized tags will be exposed to userspace.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/I8ad54476f3b2d0144ccd8ce0c1d7a2963…
Fixes: e059853d14ca ("arm64: mte: Fix/clarify the PG_mte_tagged semantics")
Cc: <stable(a)vger.kernel.org> # 6.1
---
The Fixes: tag (and the commit message in general) are written assuming
that this patch is landed in a maintainer tree instead of
"arm64: mte: Do not set PG_mte_tagged if tags were not initialized".
arch/arm64/include/asm/mte.h | 4 ++--
arch/arm64/include/asm/pgtable.h | 14 ++------------
arch/arm64/kernel/mte.c | 32 +++-----------------------------
arch/arm64/mm/mteswap.c | 7 +++----
4 files changed, 10 insertions(+), 47 deletions(-)
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 20dd06d70af5..dfea486a6a85 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}
void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t old_pte, pte_t pte);
+void mte_sync_tags(pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
-static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
+static inline void mte_sync_tags(pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b6ba466e2e8a..efdf48392026 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -337,18 +337,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
* don't expose tags (instruction fetches don't check tags).
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
- !pte_special(pte)) {
- pte_t old_pte = READ_ONCE(*ptep);
- /*
- * We only need to synchronise if the new PTE has tags enabled
- * or if swapping in (in which case another mapping may have
- * set tags in the past even if this PTE isn't tagged).
- * (!pte_none() && !pte_present()) is an open coded version of
- * is_swap_pte()
- */
- if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
- mte_sync_tags(old_pte, pte);
- }
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte);
__check_safe_pte_update(mm, ptep, pte);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f5bcb0dc6267..c40728046fed 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,41 +35,15 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
-static void mte_sync_page_tags(struct page *page, pte_t old_pte,
- bool check_swap, bool pte_is_tagged)
-{
- if (check_swap && is_swap_pte(old_pte)) {
- swp_entry_t entry = pte_to_swp_entry(old_pte);
-
- if (!non_swap_entry(entry))
- mte_restore_tags(entry, page);
- }
-
- if (!pte_is_tagged)
- return;
-
- if (try_page_mte_tagging(page)) {
- mte_clear_page_tags(page_address(page));
- set_page_mte_tagged(page);
- }
-}
-
-void mte_sync_tags(pte_t old_pte, pte_t pte)
+void mte_sync_tags(pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
- bool check_swap = nr_pages == 1;
- bool pte_is_tagged = pte_tagged(pte);
-
- /* Early out if there's nothing to do */
- if (!check_swap && !pte_is_tagged)
- return;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
- if (!page_mte_tagged(page)) {
- mte_sync_page_tags(page, old_pte, check_swap,
- pte_is_tagged);
+ if (try_page_mte_tagging(page)) {
+ mte_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
}
}
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index cd508ba80ab1..3a78bf1b1364 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -53,10 +53,9 @@ void mte_restore_tags(swp_entry_t entry, struct page *page)
if (!tags)
return;
- if (try_page_mte_tagging(page)) {
- mte_restore_page_tags(page_address(page), tags);
- set_page_mte_tagged(page);
- }
+ WARN_ON_ONCE(!try_page_mte_tagging(page));
+ mte_restore_page_tags(page_address(page), tags);
+ set_page_mte_tagged(page);
}
void mte_invalidate_tags(int type, pgoff_t offset)
--
2.40.1.606.ga4b1b128d6-goog
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3ce29c17dc847bf4245e16aad78a7617afa96297
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023050749-deskwork-snowboard-82cf@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
3ce29c17dc84 ("igc: read before write to SRRCTL register")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3ce29c17dc847bf4245e16aad78a7617afa96297 Mon Sep 17 00:00:00 2001
From: Song Yoong Siang <yoong.siang.song(a)intel.com>
Date: Tue, 2 May 2023 08:48:06 -0700
Subject: [PATCH] igc: read before write to SRRCTL register
igc_configure_rx_ring() function will be called as part of XDP program
setup. If Rx hardware timestamp is enabled prio to XDP program setup,
this timestamp enablement will be overwritten when buffer size is
written into SRRCTL register.
Thus, this commit read the register value before write to SRRCTL
register. This commit is tested by using xdp_hw_metadata bpf selftest
tool. The tool enables Rx hardware timestamp and then attach XDP program
to igc driver. It will display hardware timestamp of UDP packet with
port number 9092. Below are detail of test steps and results.
Command on DUT:
sudo ./xdp_hw_metadata <interface name>
Command on Link Partner:
echo -n skb | nc -u -q1 <destination IPv4 addr> 9092
Result before this patch:
skb hwtstamp is not found!
Result after this patch:
found skb hwtstamp = 1677800973.642836757
Optionally, read PHC to confirm the values obtained are almost the same:
Command:
sudo ./testptp -d /dev/ptp0 -g
Result:
clock time: 1677800973.913598978 or Fri Mar 3 07:49:33 2023
Fixes: fc9df2a0b520 ("igc: Enable RX via AF_XDP zero-copy")
Cc: <stable(a)vger.kernel.org> # 5.14+
Signed-off-by: Song Yoong Siang <yoong.siang.song(a)intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller(a)intel.com>
Reviewed-by: Jesper Dangaard Brouer <brouer(a)redhat.com>
Tested-by: Jesper Dangaard Brouer <brouer(a)redhat.com>
Tested-by: Naama Meir <naamax.meir(a)linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
Reviewed-by: Leon Romanovsky <leonro(a)nvidia.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index 7a992befca24..9f3827eda157 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -87,8 +87,13 @@ union igc_adv_rx_desc {
#define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */
/* SRRCTL bit definitions */
-#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */
-#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
-#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define IGC_SRRCTL_BSIZEPKT_MASK GENMASK(6, 0)
+#define IGC_SRRCTL_BSIZEPKT(x) FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \
+ (x) / 1024) /* in 1 KB resolution */
+#define IGC_SRRCTL_BSIZEHDR_MASK GENMASK(13, 8)
+#define IGC_SRRCTL_BSIZEHDR(x) FIELD_PREP(IGC_SRRCTL_BSIZEHDR_MASK, \
+ (x) / 64) /* in 64 bytes resolution */
+#define IGC_SRRCTL_DESCTYPE_MASK GENMASK(27, 25)
+#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF FIELD_PREP(IGC_SRRCTL_DESCTYPE_MASK, 1)
#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ba49728be919..1c4676882082 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -640,8 +640,11 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
else
buf_size = IGC_RXBUFFER_2048;
- srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
- srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+ srrctl = rd32(IGC_SRRCTL(reg_idx));
+ srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK |
+ IGC_SRRCTL_DESCTYPE_MASK);
+ srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN);
+ srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size);
srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
wr32(IGC_SRRCTL(reg_idx), srrctl);
This is a backport of the CR0.WP KVM series[1] to Linux v5.15. It
differs from the v6.1 backport as in needing additional prerequisite
patches from Lai Jiangshan (and fixes for those) to ensure the
assumption it's safe to let CR0.WP be a guest owned bit still stand.
I used 'ssdd 10 50000' from rt-tests[2] as a micro-benchmark, running on
a grsecurity L1 VM. Below table shows the results (runtime in seconds,
lower is better):
legacy TDP shadow
Linux v5.15.106 9.94s 66.1s 64.9s
+ patches 4.81s 4.79s 64.6s
It's interesting to see that using the TDP MMU is even slower than
shadow paging on a vanilla kernel, making the impact of this backport
even more significant.
The KVM unit test suite showed no regressions.
Please consider applying.
Thanks,
Mathias
[1] https://lore.kernel.org/kvm/20230322013731.102955-1-minipli@grsecurity.net/
[2] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git
Lai Jiangshan (3):
KVM: X86: Don't reset mmu context when X86_CR4_PCIDE 1->0
KVM: X86: Don't reset mmu context when toggling X86_CR4_PGE
KVM: x86/mmu: Reconstruct shadow page root if the guest PDPTEs is
changed
Mathias Krause (3):
KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP
enabled
KVM: x86: Make use of kvm_read_cr*_bits() when testing bits
KVM: VMX: Make CR0.WP a guest owned bit
Paolo Bonzini (1):
KVM: x86/mmu: Avoid indirect call for get_cr3
Sean Christopherson (1):
KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission
faults
arch/x86/kvm/kvm_cache_regs.h | 2 +-
arch/x86/kvm/mmu.h | 42 ++++++++++++++++++++++++++++++----
arch/x86/kvm/mmu/mmu.c | 27 +++++++++++++++++-----
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
arch/x86/kvm/pmu.c | 4 ++--
arch/x86/kvm/vmx/nested.c | 4 ++--
arch/x86/kvm/vmx/vmx.c | 6 ++---
arch/x86/kvm/vmx/vmx.h | 18 +++++++++++++++
arch/x86/kvm/x86.c | 27 +++++++++++++++++++---
9 files changed, 110 insertions(+), 22 deletions(-)
--
2.39.2
From: Oliver Hartkopp <socketcan(a)hartkopp.net>
The control message provided by J1939 support MSG_CMSG_COMPAT but
blocked recvmsg() syscalls that have set this flag, i.e. on 32bit user
space on 64 bit kernels.
Link: https://github.com/hartkopp/can-isotp/issues/59
Cc: Oleksij Rempel <o.rempel(a)pengutronix.de>
Suggested-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
Signed-off-by: Oliver Hartkopp <socketcan(a)hartkopp.net>
Tested-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Acked-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/20230505110308.81087-3-mkl@pengutronix.de
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
net/can/j1939/socket.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 7e90f9e61d9b..1790469b2580 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -798,7 +798,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
struct j1939_sk_buff_cb *skcb;
int ret = 0;
- if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+ if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE | MSG_CMSG_COMPAT))
return -EINVAL;
if (flags & MSG_ERRQUEUE)
--
2.39.2
The patch titled
Subject: nilfs2: fix incomplete buffer cleanup in nilfs_btnode_abort_change_key()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-fix-incomplete-buffer-cleanup-in-nilfs_btnode_abort_change_key.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix incomplete buffer cleanup in nilfs_btnode_abort_change_key()
Date: Sat, 13 May 2023 19:24:28 +0900
A syzbot fault injection test reported that nilfs_btnode_create_block, a
helper function that allocates a new node block for b-trees, causes a
kernel BUG for disk images where the file system block size is smaller
than the page size.
This was due to unexpected flags on the newly allocated buffer head, and
it turned out to be because the buffer flags were not cleared by
nilfs_btnode_abort_change_key() after an error occurred during a b-tree
update operation and the buffer was later reused in that state.
Fix this issue by using nilfs_btnode_delete() to abandon the unused
preallocated buffer in nilfs_btnode_abort_change_key().
Link: https://lkml.kernel.org/r/20230513102428.10223-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+b0a35a5c1f7e846d3b09(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/000000000000d1d6c205ebc4d512@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/btnode.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
--- a/fs/nilfs2/btnode.c~nilfs2-fix-incomplete-buffer-cleanup-in-nilfs_btnode_abort_change_key
+++ a/fs/nilfs2/btnode.c
@@ -285,6 +285,14 @@ void nilfs_btnode_abort_change_key(struc
if (nbh == NULL) { /* blocksize == pagesize */
xa_erase_irq(&btnc->i_pages, newkey);
unlock_page(ctxt->bh->b_page);
- } else
- brelse(nbh);
+ } else {
+ /*
+ * When canceling a buffer that a prepare operation has
+ * allocated to copy a node block to another location, use
+ * nilfs_btnode_delete() to initialize and release the buffer
+ * so that the buffer flags will not be in an inconsistent
+ * state when it is reallocated.
+ */
+ nilfs_btnode_delete(nbh);
+ }
}
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-use-after-free-bug-of-nilfs_root-in-nilfs_evict_inode.patch
nilfs2-fix-incomplete-buffer-cleanup-in-nilfs_btnode_abort_change_key.patch