Commit 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs") changed the handover logic of the vector identifier from ~vector in orig_ax to purely register based. Unfortunately, this field has another consumer in the APIC code which the commit did not touch. The net result was that IRQ balancing did not work and instead resulted in interrupt storms, slowing down the system.
This patch restores the original semantics that orig_ax contains the vector. When we receive an interrupt now, the actual vector number stays stored in the orig_ax field which then gets consumed by the APIC code.
To ensure that nobody else trips over this in the future, the patch also adds comments at strategic places to warn anyone who would refactor the code that there is another consumer of the field.
With this patch in place, IRQ balancing works as expected and performance levels are restored to previous levels.
Reported-by: Alex bykov alex.bykov@scylladb.com Reported-by: Avi Kivity avi@scylladb.com Fixes: 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs") Cc: stable@vger.kernel.org Signed-off-by: Alexander Graf graf@amazon.com --- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 17 +++++++++++------ arch/x86/kernel/apic/vector.c | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index df8c017..22e829c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -727,7 +727,7 @@ SYM_CODE_START_LOCAL(asm_\cfunc) ENCODE_FRAME_POINTER movl %esp, %eax movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ - movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ + /* keep vector on stack for APIC's irq_complete_move() */ call \cfunc jmp handle_exception_return SYM_CODE_END(asm_\cfunc) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 70dea93..d78fb1c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -319,7 +319,7 @@ SYM_CODE_END(ret_from_fork) * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack */ -.macro idtentry_body cfunc has_error_code:req +.macro idtentry_body cfunc has_error_code:req preserve_error_code:req
call error_entry UNWIND_HINT_REGS @@ -328,7 +328,9 @@ SYM_CODE_END(ret_from_fork)
.if \has_error_code == 1 movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .if \preserve_error_code == 0 + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .endif .endif
call \cfunc @@ -346,7 +348,7 @@ SYM_CODE_END(ret_from_fork) * The macro emits code to set up the kernel context for straight forward * and simple IDT entries. No IST stack, no paranoid entry checks. */ -.macro idtentry vector asmsym cfunc has_error_code:req +.macro idtentry vector asmsym cfunc has_error_code:req preserve_error_code=0 SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 ASM_CLAC @@ -369,7 +371,7 @@ SYM_CODE_START(\asmsym) .Lfrom_usermode_no_gap_@: .endif
- idtentry_body \cfunc \has_error_code + idtentry_body \cfunc \has_error_code \preserve_error_code
_ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) @@ -382,11 +384,14 @@ SYM_CODE_END(\asmsym) * position of idtentry exceptions, and jump to one of the two idtentry points * (common/spurious). * + * The original vector number on the stack has to stay untouched, so that the + * APIC irq_complete_move() code can access it later on IRQ ack. + * * common_interrupt is a hotpath, align it to a cache line */ .macro idtentry_irq vector cfunc .p2align CONFIG_X86_L1_CACHE_SHIFT - idtentry \vector asm_\cfunc \cfunc has_error_code=1 + idtentry \vector asm_\cfunc \cfunc has_error_code=1 preserve_error_code=1 .endm
/* @@ -440,7 +445,7 @@ SYM_CODE_START(\asmsym)
/* Switch to the regular task stack and use the noist entry point */ .Lfrom_usermode_switch_stack_@: - idtentry_body noist_\cfunc, has_error_code=0 + idtentry_body noist_\cfunc, has_error_code=0, preserve_error_code=0
_ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index dae32d9..e81b835 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -924,7 +924,7 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
void irq_complete_move(struct irq_cfg *cfg) { - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); + __irq_complete_move(cfg, (u8)get_irq_regs()->orig_ax); }
/*