On Tue, 2023-11-14 at 20:05 +0000, Mark Brown wrote:
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index 59e15dd8d0f8..7ffe90010587 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -191,18 +191,38 @@ void reset_thread_features(void) current->thread.features_locked = 0; } -unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags, - unsigned long stack_size) +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) { struct thread_shstk *shstk = &tsk->thread.shstk; + unsigned long clone_flags = args->flags; unsigned long addr, size; /* * If shadow stack is not enabled on the new thread, skip any - * switch to a new shadow stack. + * implicit switch to a new shadow stack and reject attempts to + * explciitly specify one. */ - if (!features_enabled(ARCH_SHSTK_SHSTK)) + if (!features_enabled(ARCH_SHSTK_SHSTK)) { + if (args->shadow_stack) + return (unsigned long)ERR_PTR(-EINVAL);
return 0; + }
+ /* + * If the user specified a shadow stack then do some basic + * validation and use it. The caller is responsible for + * freeing the shadow stack. + */ + if (args->shadow_stack_size) { + size = args->shadow_stack_size;
+ if (size < 8) + return (unsigned long)ERR_PTR(-EINVAL);
What is the intention here? The check in map_shadow_stack is to leave space for the token, but here there is no token.
I think for CLONE_VM we should not require a non-zero size. Speaking of CLONE_VM we should probably be clear on what the expected behavior is for situations when a new shadow stack is not usually allocated. !CLONE_VM || CLONE_VFORK will use the existing shadow stack. Should we require shadow_stack_size be zero in this case, or just ignore it? I'd lean towards requiring it to be zero so userspace doesn't pass garbage in that we have to accommodate later. What we could possibly need to do around that though, I'm not sure. What do you think?
+ } else { + size = args->stack_size; + } /* * For CLONE_VFORK the child will share the parents shadow stack. @@ -222,7 +242,7 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long cl if (!(clone_flags & CLONE_VM)) return 0; - size = adjust_shstk_size(stack_size); + size = adjust_shstk_size(size); addr = alloc_shstk(0, size, 0, false); if (IS_ERR_VALUE(addr)) return addr; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a23af225c898..94e7cf62be51 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -41,6 +41,8 @@ struct kernel_clone_args { void *fn_arg; struct cgroup *cgrp; struct css_set *cset; + unsigned long shadow_stack;
Was this ^ left in accidentally? Elsewhere in this patch it is getting checked too.
+ unsigned long shadow_stack_size; }; /* diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 3bac0a8ceab2..a998b6d0c897 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -84,6 +84,8 @@ * kernel's limit of nested PID namespaces. * @cgroup: If CLONE_INTO_CGROUP is specified set this to * a file descriptor for the cgroup.
- @shadow_stack_size: Specify the size of the shadow stack to
allocate
- * for the child process.
* * The structure is versioned by size and thus extensible. * New struct members must go at the end of the struct and @@ -101,12 +103,14 @@ struct clone_args { __aligned_u64 set_tid; __aligned_u64 set_tid_size; __aligned_u64 cgroup; + __aligned_u64 shadow_stack_size; }; #endif #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ #define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ #define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#define CLONE_ARGS_SIZE_VER3 96 /* sizeof fourth published struct */ /* * Scheduling policies diff --git a/kernel/fork.c b/kernel/fork.c index 10917c3e1f03..b0df69c8185e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3067,7 +3067,9 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, CLONE_ARGS_SIZE_VER1); BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != CLONE_ARGS_SIZE_VER2); - BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); + BUILD_BUG_ON(offsetofend(struct clone_args, shadow_stack_size) != + CLONE_ARGS_SIZE_VER3); + BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER3); if (unlikely(usize > PAGE_SIZE)) return -E2BIG; @@ -3110,6 +3112,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, .tls = args.tls, .set_tid_size = args.set_tid_size, .cgroup = args.cgroup, + .shadow_stack_size = args.shadow_stack_size, }; if (args.set_tid && @@ -3150,6 +3153,23 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) return true; } +/**
- clone3_shadow_stack_valid - check and prepare shadow stack
- @kargs: kernel clone args
- Verify that shadow stacks are only enabled if supported.
- */
+static inline bool clone3_shadow_stack_valid(struct kernel_clone_args *kargs) +{ +#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK + /* The architecture must check support on the specific machine */ + return true; +#else + /* The architecture does not support shadow stacks */ + return !kargs->shadow_stack_size; +#endif
This might be simpler: return IS_ENABLED(CONFIG_ARCH_HAS_USER_SHADOW_STACK) || !kargs->shadow_stack_size;
+}
static bool clone3_args_valid(struct kernel_clone_args *kargs) { /* Verify that no unknown flags are passed along. */ @@ -3172,7 +3192,7 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs) kargs->exit_signal) return false; - if (!clone3_stack_valid(kargs)) + if (!clone3_stack_valid(kargs) || !clone3_shadow_stack_valid(kargs)) return false; return true;