openat2 had a bug: if we pass RESOLVE_NO_XDEV, then openat2
doesn't traverse through automounts, but may still trigger them.
(See the link for full bug report with reproducer.)
This commit fixes this bug.
Link: https://lore.kernel.org/linux-fsdevel/20250817075252.4137628-1-safinaskar@z…
Fixes: fddb5d430ad9fa91b49b1 ("open: introduce openat2(2) syscall")
Reviewed-by: Aleksa Sarai <cyphar(a)cyphar.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Askar Safin <safinaskar(a)zohomail.com>
---
fs/namei.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/fs/namei.c b/fs/namei.c
index c23e5a076ab3..bfa8232b94dc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1449,6 +1449,10 @@ static int follow_automount(struct path *path, int *count, unsigned lookup_flags
dentry->d_inode)
return -EISDIR;
+ /* No need to trigger automounts if mountpoint crossing is disabled. */
+ if (lookup_flags & LOOKUP_NO_XDEV)
+ return -EXDEV;
+
if (count && (*count)++ >= MAXSYMLINKS)
return -ELOOP;
@@ -1472,6 +1476,10 @@ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
/* Allow the filesystem to manage the transit without i_rwsem
* being held. */
if (flags & DCACHE_MANAGE_TRANSIT) {
+ if (lookup_flags & LOOKUP_NO_XDEV) {
+ ret = -EXDEV;
+ break;
+ }
ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
--
2.47.2
Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
populating PGD and P4D entries for the kernel address space.
These helpers ensure proper synchronization of page tables when
updating the kernel portion of top-level page tables.
Until now, the kernel has relied on each architecture to handle
synchronization of top-level page tables in an ad-hoc manner.
For example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for
direct mapping and vmemmap mapping changes").
However, this approach has proven fragile for following reasons:
1) It is easy to forget to perform the necessary page table
synchronization when introducing new changes.
For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
savings for compound devmaps") overlooked the need to synchronize
page tables for the vmemmap area.
2) It is also easy to overlook that the vmemmap and direct mapping areas
must not be accessed before explicit page table synchronization.
For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
sub-pmd ranges")) caused crashes by accessing the vmemmap area
before calling sync_global_pgds().
To address this, as suggested by Dave Hansen, introduce _kernel() variants
of the page table population helpers, which invoke architecture-specific
hooks to properly synchronize page tables. These are introduced in a new
header file, include/linux/pgalloc.h, so they can be called from common code.
They reuse existing infrastructure for vmalloc and ioremap.
Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
and the actual synchronization is performed by arch_sync_kernel_mappings().
This change currently targets only x86_64, so only PGD and P4D level
helpers are introduced. In theory, PUD and PMD level helpers can be added
later if needed by other architectures.
Currently this is a no-op, since no architecture sets
PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.
Cc: <stable(a)vger.kernel.org>
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
include/linux/pgalloc.h | 24 ++++++++++++++++++++++++
include/linux/pgtable.h | 4 ++--
mm/kasan/init.c | 12 ++++++------
mm/percpu.c | 6 +++---
mm/sparse-vmemmap.c | 6 +++---
5 files changed, 38 insertions(+), 14 deletions(-)
create mode 100644 include/linux/pgalloc.h
diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h
new file mode 100644
index 000000000000..290ab864320f
--- /dev/null
+++ b/include/linux/pgalloc.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLOC_H
+#define _LINUX_PGALLOC_H
+
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+
+static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd,
+ p4d_t *p4d)
+{
+ pgd_populate(&init_mm, pgd, p4d);
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)
+ arch_sync_kernel_mappings(addr, addr);
+}
+
+static inline void p4d_populate_kernel(unsigned long addr, p4d_t *p4d,
+ pud_t *pud)
+{
+ p4d_populate(&init_mm, p4d, pud);
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED)
+ arch_sync_kernel_mappings(addr, addr);
+}
+
+#endif /* _LINUX_PGALLOC_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index ba699df6ef69..0cf5c6c3e483 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
*/
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index ced6b29fcf76..8fce3370c84e 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -13,9 +13,9 @@
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/slab.h>
+#include <linux/pgalloc.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
#include "kasan.h"
@@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
pud_t *pud;
pmd_t *pmd;
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
} else {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
@@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
* puds,pmds, so pgd_populate(), pud_populate()
* is noops.
*/
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
lm_alias(kasan_early_shadow_p4d));
p4d = p4d_offset(pgd, addr);
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
if (!p)
return -ENOMEM;
} else {
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
}
}
diff --git a/mm/percpu.c b/mm/percpu.c
index d9cbaee92b60..a56f35dcc417 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3108,7 +3108,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
#endif /* BUILD_EMBED_FIRST_CHUNK */
#ifdef BUILD_PAGE_FIRST_CHUNK
-#include <asm/pgalloc.h>
+#include <linux/pgalloc.h>
#ifndef P4D_TABLE_SIZE
#define P4D_TABLE_SIZE PAGE_SIZE
@@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
if (pgd_none(*pgd)) {
p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
- pgd_populate(&init_mm, pgd, p4d);
+ pgd_populate_kernel(addr, pgd, p4d);
}
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- p4d_populate(&init_mm, p4d, pud);
+ p4d_populate_kernel(addr, p4d, pud);
}
pud = pud_offset(p4d, addr);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 41aa0493eb03..dbd8daccade2 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -27,9 +27,9 @@
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
+#include <linux/pgalloc.h>
#include <asm/dma.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "hugetlb_vmemmap.h"
@@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
if (!p)
return NULL;
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
return p4d;
}
@@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
- pgd_populate(&init_mm, pgd, p);
+ pgd_populate_kernel(addr, pgd, p);
}
return pgd;
}
--
2.43.0
Commit ID: 9cd51eefae3c871440b93c03716c5398f41bdf78
Why it should be applied: This is a small addition to a quirk list for which I
forgot to set cc: stable when originally submitted it. Bringing it onto stable
will result in several downstream distributions automatically adopting the
patch, helping the affected device.
What kernel versions I wish it to be applied to: It should apply cleanly to
6.1.y and newer longterm releases.
I hope I correctly followed
https://docs.kernel.org/process/stable-kernel-rules.html#option-2 to bring this
into stable.
ENODATA (aka ENOATTR) has a very specific meaning in the xfs xattr code;
namely, that the requested attribute name could not be found.
However, a medium error from disk may also return ENODATA. At best,
this medium error may escape to userspace as "attribute not found"
when in fact it's an IO (disk) error.
At worst, we may oops in xfs_attr_leaf_get() when we do:
error = xfs_attr_leaf_hasname(args, &bp);
if (error == -ENOATTR) {
xfs_trans_brelse(args->trans, bp);
return error;
}
because an ENODATA/ENOATTR error from disk leaves us with a null bp,
and the xfs_trans_brelse will then null-deref it.
As discussed on the list, we really need to modify the lower level
IO functions to trap all disk errors and ensure that we don't let
unique errors like this leak up into higher xfs functions - many
like this should be remapped to EIO.
However, this patch directly addresses a reported bug in the xattr
code, and should be safe to backport to stable kernels. A larger-scope
patch to handle more unique errors at lower levels can follow later.
(Note, prior to 07120f1abdff we did not oops, but we did return the
wrong error code to userspace.)
Signed-off-by: Eric Sandeen <sandeen(a)redhat.com>
Fixes: 07120f1abdff ("xfs: Add xfs_has_attr and subroutines")
Cc: <stable(a)vger.kernel.org> # v5.9+
---
(I get it that sprinkling this around to 3 places might have an ick
factor but I think it's necessary to make a surgical strike on this bug
before we address the general problem.)
Thanks,
-Eric
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index fddb55605e0c..9b54cfb0e13d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -478,6 +478,12 @@ xfs_attr3_leaf_read(
err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK,
&xfs_attr3_leaf_buf_ops);
+ /*
+ * ENODATA from disk implies a disk medium failure; ENODATA for
+ * xattrs means attribute not found, so disambiguate that here.
+ */
+ if (err == -ENODATA)
+ err = -EIO;
if (err || !(*bpp))
return err;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 4c44ce1c8a64..bff3dc226f81 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -435,6 +435,13 @@ xfs_attr_rmtval_get(
0, &bp, &xfs_attr3_rmt_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
+ /*
+ * ENODATA from disk implies a disk medium failure;
+ * ENODATA for xattrs means attribute not found, so
+ * disambiguate that here.
+ */
+ if (error == -ENODATA)
+ error = -EIO;
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 17d9e6154f19..723a0643b838 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2833,6 +2833,12 @@ xfs_da_read_buf(
&bp, ops);
if (xfs_metadata_is_sick(error))
xfs_dirattr_mark_sick(dp, whichfork);
+ /*
+ * ENODATA from disk implies a disk medium failure; ENODATA for
+ * xattrs means attribute not found, so disambiguate that here.
+ */
+ if (error == -ENODATA && whichfork == XFS_ATTR_FORK)
+ error = -EIO;
if (error)
goto out_free;
object_err() reports details of an object for further debugging, such as
the freelist pointer, redzone, etc. However, if the pointer is invalid,
attempting to access object metadata can lead to a crash since it does
not point to a valid object.
In case the pointer is NULL or check_valid_pointer() returns false for
the pointer, only print the pointer value and skip accessing metadata.
Fixes: 81819f0fc828 ("SLUB core")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Li Qiong <liqiong(a)nfschina.com>
---
v2:
- rephrase the commit message, add comment for object_err().
v3:
- check object pointer in object_err().
v4:
- restore changes in alloc_consistency_checks().
v5:
- rephrase message, fix code style.
v6:
- add checking 'object' if NULL.
---
mm/slub.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/mm/slub.c b/mm/slub.c
index 31e11ef256f9..972cf2bb2ee6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1104,7 +1104,12 @@ static void object_err(struct kmem_cache *s, struct slab *slab,
return;
slab_bug(s, reason);
- print_trailer(s, slab, object);
+ if (!object || !check_valid_pointer(s, slab, object)) {
+ print_slab_info(slab);
+ pr_err("Invalid pointer 0x%p\n", object);
+ } else {
+ print_trailer(s, slab, object);
+ }
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
WARN_ON(1);
--
2.30.2
acpi_put_table() is only called when kobject_create_and_add() or
fpdt_process_subtable() fails, but not on the success path. This causes
a memory leak if initialization succeeds.
Ensure acpi_put_table() is called in all cases by adding a put_table
label and routing both success and failure paths through it. Drop the
err_subtable label since kobject_put() is only needed when
fpdt_process_subtable() fails.
Fixes: d1eb86e59be0 ("ACPI: tables: introduce support for FPDT table")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn>
---
drivers/acpi/acpi_fpdt.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c
index 271092f2700a..c8aea5bb187c 100644
--- a/drivers/acpi/acpi_fpdt.c
+++ b/drivers/acpi/acpi_fpdt.c
@@ -275,7 +275,7 @@ static int __init acpi_init_fpdt(void)
struct acpi_table_header *header;
struct fpdt_subtable_entry *subtable;
u32 offset = sizeof(*header);
- int result;
+ int result = 0;
status = acpi_get_table(ACPI_SIG_FPDT, 0, &header);
@@ -285,7 +285,7 @@ static int __init acpi_init_fpdt(void)
fpdt_kobj = kobject_create_and_add("fpdt", acpi_kobj);
if (!fpdt_kobj) {
result = -ENOMEM;
- goto err_nomem;
+ goto put_table;
}
while (offset < header->length) {
@@ -295,8 +295,10 @@ static int __init acpi_init_fpdt(void)
case SUBTABLE_S3PT:
result = fpdt_process_subtable(subtable->address,
subtable->type);
- if (result)
- goto err_subtable;
+ if (result) {
+ kobject_put(fpdt_kobj);
+ goto put_table;
+ }
break;
default:
/* Other types are reserved in ACPI 6.4 spec. */
@@ -304,11 +306,8 @@ static int __init acpi_init_fpdt(void)
}
offset += sizeof(*subtable);
}
- return 0;
-err_subtable:
- kobject_put(fpdt_kobj);
-err_nomem:
+put_table:
acpi_put_table(header);
return result;
}
--
2.20.1