From: Nadav Amit namit@vmware.com
commit efd608fa7403ba106412b437f873929e2c862e28 upstream.
I encountered some occasional crashes of poke_int3_handler() when kprobes are set, while accessing desc->vec.
The text poke mechanism claims to have an RCU-like behavior, but it does not appear that there is any quiescent state to ensure that nobody holds reference to desc. As a result, the following race appears to be possible, which can lead to memory corruption.
CPU0 CPU1 ---- ---- text_poke_bp_batch() -> smp_store_release(&bp_desc, &desc)
[ notice that desc is on the stack ]
poke_int3_handler()
[ int3 might be kprobe's so sync events are do not help ]
-> try_get_desc(descp=&bp_desc) desc = __READ_ONCE(bp_desc)
if (!desc) [false, success] WRITE_ONCE(bp_desc, NULL); atomic_dec_and_test(&desc.refs)
[ success, desc space on the stack is being reused and might have non-zero value. ] arch_atomic_inc_not_zero(&desc->refs)
[ might succeed since desc points to stack memory that was freed and might be reused. ]
Fix this issue with small backportable patch. Instead of trying to make RCU-like behavior for bp_desc, just eliminate the unnecessary level of indirection of bp_desc, and hold the whole descriptor as a global. Anyhow, there is only a single descriptor at any given moment.
Fixes: 1f676247f36a4 ("x86/alternatives: Implement a better poke_int3_handler() completion scheme") Signed-off-by: Nadav Amit namit@vmware.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: stable@kernel.org Link: https://lkml.kernel.org/r/20220920224743.3089-1-namit@vmware.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- arch/x86/kernel/alternative.c | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-)
--- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1200,22 +1200,23 @@ struct bp_patching_desc { atomic_t refs; };
-static struct bp_patching_desc *bp_desc; +static struct bp_patching_desc bp_desc;
static __always_inline -struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +struct bp_patching_desc *try_get_desc(void) { - /* rcu_dereference */ - struct bp_patching_desc *desc = __READ_ONCE(*descp); + struct bp_patching_desc *desc = &bp_desc;
- if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) + if (!arch_atomic_inc_not_zero(&desc->refs)) return NULL;
return desc; }
-static __always_inline void put_desc(struct bp_patching_desc *desc) +static __always_inline void put_desc(void) { + struct bp_patching_desc *desc = &bp_desc; + smp_mb__before_atomic(); arch_atomic_dec(&desc->refs); } @@ -1248,15 +1249,15 @@ noinstr int poke_int3_handler(struct pt_
/* * Having observed our INT3 instruction, we now must observe - * bp_desc: + * bp_desc with non-zero refcount: * - * bp_desc = desc INT3 + * bp_desc.refs = 1 INT3 * WMB RMB - * write INT3 if (desc) + * write INT3 if (bp_desc.refs != 0) */ smp_rmb();
- desc = try_get_desc(&bp_desc); + desc = try_get_desc(); if (!desc) return 0;
@@ -1310,7 +1311,7 @@ noinstr int poke_int3_handler(struct pt_ ret = 1;
out_put: - put_desc(desc); + put_desc(); return ret; }
@@ -1341,18 +1342,20 @@ static int tp_vec_nr; */ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { - struct bp_patching_desc desc = { - .vec = tp, - .nr_entries = nr_entries, - .refs = ATOMIC_INIT(1), - }; unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; int do_sync;
lockdep_assert_held(&text_mutex);
- smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ + bp_desc.vec = tp; + bp_desc.nr_entries = nr_entries; + + /* + * Corresponds to the implicit memory barrier in try_get_desc() to + * ensure reading a non-zero refcount provides up to date bp_desc data. + */ + atomic_set_release(&bp_desc.refs, 1);
/* * Corresponding read barrier in int3 notifier for making sure the @@ -1440,12 +1443,10 @@ static void text_poke_bp_batch(struct te text_poke_sync();
/* - * Remove and synchronize_rcu(), except we have a very primitive - * refcount based completion. + * Remove and wait for refs to be zero. */ - WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ - if (!atomic_dec_and_test(&desc.refs)) - atomic_cond_read_acquire(&desc.refs, !VAL); + if (!atomic_dec_and_test(&bp_desc.refs)) + atomic_cond_read_acquire(&bp_desc.refs, !VAL); }
static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,