Creating a series of detached mounts, attaching them to the filesystem,
and unmounting them can be used to trigger an integer overflow in
ns->mounts causing the kernel to block any new mounts in count_mounts()
and returning ENOSPC because it falsely assumes that the maximum number
of mounts in the mount namespace has been reached, i.e. it thinks it
can't fit the new mounts into the mount namespace anymore.
Depending on the number of mounts in your system, this can be reproduced
on any kernel that supportes open_tree() and move_mount() with the
following instructions:
1. Compile the following program "repro.c" via "make repro"
> cat repro.c
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/mount.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
/* open_tree() */
#ifndef OPEN_TREE_CLONE
#define OPEN_TREE_CLONE 1
#endif
#ifndef OPEN_TREE_CLOEXEC
#define OPEN_TREE_CLOEXEC O_CLOEXEC
#endif
#ifndef __NR_open_tree
#if defined __alpha__
#define __NR_open_tree 538
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_open_tree 4428
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_open_tree 6428
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_open_tree 5428
#endif
#elif defined __ia64__
#define __NR_open_tree (428 + 1024)
#else
#define __NR_open_tree 428
#endif
#endif
/* move_mount() */
#ifndef MOVE_MOUNT_F_EMPTY_PATH
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
#endif
#ifndef __NR_move_mount
#if defined __alpha__
#define __NR_move_mount 539
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_move_mount 4429
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_move_mount 6429
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_move_mount 5429
#endif
#elif defined __ia64__
#define __NR_move_mount (428 + 1024)
#else
#define __NR_move_mount 429
#endif
#endif
static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
{
return syscall(__NR_open_tree, dfd, filename, flags);
}
static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
const char *to_pathname, unsigned int flags)
{
return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
}
static void usage(void)
{
const char *text = "mount-new [--recursive] <source> <target>\n";
fprintf(stderr, "%s", text);
_exit(EXIT_SUCCESS);
}
#define exit_usage(format, ...) \
({ \
fprintf(stderr, format, ##__VA_ARGS__); \
usage(); \
})
#define exit_log(format, ...) \
({ \
fprintf(stderr, format, ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
})
static const struct option longopts[] = {
{"recursive", no_argument, 0, 'a'},
{"help", no_argument, 0, 'b'},
{ NULL, no_argument, 0, 0 },
};
int main(int argc, char *argv[])
{
int index = 0;
bool recursive = false;
int fd_tree, new_argc, ret;
char *source, *target;
char *const *new_argv;
while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
switch (ret) {
case 'a':
recursive = true;
break;
case 'b':
/* fallthrough */
default:
usage();
}
}
new_argv = &argv[optind];
new_argc = argc - optind;
if (new_argc < 2)
exit_usage("Missing source or target mountpoint\n\n");
source = new_argv[0];
target = new_argv[1];
fd_tree = sys_open_tree(-EBADF, source,
OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC |
AT_EMPTY_PATH |
(recursive ? AT_RECURSIVE : 0));
if (fd_tree < 0) {
exit_log("%m - Failed to open %s\n", source);
exit(EXIT_FAILURE);
}
ret = sys_move_mount(fd_tree, "", -EBADF, target, MOVE_MOUNT_F_EMPTY_PATH);
if (ret < 0)
exit_log("%m - Failed to attach mount to %s\n", target);
close(fd_tree);
exit(EXIT_SUCCESS);
}
2. Run a loop like:
while true; do sudo ./repro; done
and wait for the kernel to refuse any new mounts by returning ENOSPC.
Depending on the number of mounts you have on the system this might take
shorter or longer.
The root cause of this is that detached mounts aren't handled correctly
when the destination mount point is MS_SHARED causing a borked mount
tree and ultimately to a miscalculation of the number of mounts in the
mount namespace.
Detached mounts created via
open_tree(fd, path, OPEN_TREE_CLONE)
are essentially like an unattached new mount, or an unattached
bind-mount. They can then later on be attached to the filesystem via
move_mount() which calles into attach_recursive_mount(). Part of
attaching it to the filesystem is making sure that mounts get correctly
propagated in case the destination mountpoint is MS_SHARED, i.e. is a
shared mountpoint. This is done by calling into propagate_mnt() which
walks the list of peers calling propagate_one() on each mount in this
list making sure it receives the propagation event.
For new mounts and bind-mounts propagate_one() knows to skip them by
realizing that there's no mount namespace attached to them.
However, detached mounts have an anonymous mount namespace attached to
them and so they aren't skipped causing the mount table to get wonky and
ultimately preventing any new mounts via the ENOSPC issue.
So teach propagation to handle detached mounts by making it aware of
them. I've been tracking this issue down for the last couple of days by
unmounting everything in my current mount table leaving only /proc and
/sys mounted and running the reproducer above verifying the counting of
the mounts. With this fix the counts are correct and the ENOSPC issue
can't be reproduced.
Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around")
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: linux-fsdevel(a)vger.kernel.org
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christian Brauner <christian.brauner(a)ubuntu.com>
---
fs/pnode.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/pnode.h b/fs/pnode.h
index 26f74e092bd9..988f1aa9b02a 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -12,7 +12,7 @@
#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
-#define IS_MNT_NEW(m) (!(m)->mnt_ns)
+#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
base-commit: f69d02e37a85645aa90d18cacfff36dba370f797
--
2.27.0
On Thu, Mar 4, 2021 at 12:23 AM Andrew Morton <akpm(a)linux-foundation.org> wrote:
>
> On Fri, 26 Feb 2021 02:25:37 +0100 Andrey Konovalov <andreyknvl(a)google.com> wrote:
>
> > Currently, kasan_free_nondeferred_pages()->kasan_free_pages() is called
> > after debug_pagealloc_unmap_pages(). This causes a crash when
> > debug_pagealloc is enabled, as HW_TAGS KASAN can't set tags on an
> > unmapped page.
> >
> > This patch puts kasan_free_nondeferred_pages() before
> > debug_pagealloc_unmap_pages().
> >
> > Besides fixing the crash, this also makes the annotation order consistent
> > with debug_pagealloc_map_pages() preceding kasan_alloc_pages().
> >
>
> This bug exists in 5.12, does it not?
>
> If so, is cc:stable appropriate and if so, do we have a suitable Fixes:
> commit?
Sure:
Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS")
Cc: <stable(a)vger.kernel.org>
EVM_ALLOW_METADATA_WRITES is an EVM initialization flag that can be set to
temporarily disable metadata verification until all xattrs/attrs necessary
to verify an EVM portable signature are copied to the file. This flag is
cleared when EVM is initialized with an HMAC key, to avoid that the HMAC is
calculated on unverified xattrs/attrs.
Currently EVM unnecessarily denies setting this flag if EVM is initialized
with a public key, which is not a concern as it cannot be used to trust
xattrs/attrs updates. This patch removes this limitation.
Cc: stable(a)vger.kernel.org # 4.16.x
Fixes: ae1ba1676b88e ("EVM: Allow userland to permit modification of EVM-protected metadata")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
Documentation/ABI/testing/evm | 5 +++--
security/integrity/evm/evm_secfs.c | 4 ++--
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 3c477ba48a31..eb6d70fd6fa2 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -49,8 +49,9 @@ Description:
modification of EVM-protected metadata and
disable all further modification of policy
- Note that once a key has been loaded, it will no longer be
- possible to enable metadata modification.
+ Note that once an HMAC key has been loaded, it will no longer
+ be possible to enable metadata modification and, if it is
+ already enabled, it will be disabled.
Until key loading has been signaled EVM can not create
or validate the 'security.evm' xattr, but returns
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index bbc85637e18b..197a4b83e534 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -81,10 +81,10 @@ static ssize_t evm_write_key(struct file *file, const char __user *buf,
return -EINVAL;
/* Don't allow a request to freshly enable metadata writes if
- * keys are loaded.
+ * an HMAC key is loaded.
*/
if ((i & EVM_ALLOW_METADATA_WRITES) &&
- ((evm_initialized & EVM_KEY_MASK) != 0) &&
+ ((evm_initialized & EVM_INIT_HMAC) != 0) &&
!(evm_initialized & EVM_ALLOW_METADATA_WRITES))
return -EPERM;
--
2.26.2
evm_inode_init_security() requires an HMAC key to calculate the HMAC on
initial xattrs provided by LSMs. However, it checks generically whether a
key has been loaded, including also public keys, which is not correct as
public keys are not suitable to calculate the HMAC.
Originally, support for signature verification was introduced to verify a
possibly immutable initial ram disk, when no new files are created, and to
switch to HMAC for the root filesystem. By that time, an HMAC key should
have been loaded and usable to calculate HMACs for new files.
More recently support for requiring an HMAC key was removed from the
kernel, so that signature verification can be used alone. Since this is a
legitimate use case, evm_inode_init_security() should not return an error
when no HMAC key has been loaded.
This patch fixes this problem by replacing the evm_key_loaded() check with
a check of the EVM_INIT_HMAC flag in evm_initialized.
Cc: stable(a)vger.kernel.org # 4.5.x
Fixes: 26ddabfe96b ("evm: enable EVM when X509 certificate is loaded")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Mimi Zohar <zohar(a)linux.ibm.com>
---
security/integrity/evm/evm_main.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 0de367aaa2d3..7ac5204c8d1f 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -521,7 +521,7 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
}
/*
- * evm_inode_init_security - initializes security.evm
+ * evm_inode_init_security - initializes security.evm HMAC value
*/
int evm_inode_init_security(struct inode *inode,
const struct xattr *lsm_xattr,
@@ -530,7 +530,8 @@ int evm_inode_init_security(struct inode *inode,
struct evm_xattr *xattr_data;
int rc;
- if (!evm_key_loaded() || !evm_protected_xattr(lsm_xattr->name))
+ if (!(evm_initialized & EVM_INIT_HMAC) ||
+ !evm_protected_xattr(lsm_xattr->name))
return 0;
xattr_data = kzalloc(sizeof(*xattr_data), GFP_NOFS);
--
2.26.2