New subject: x86/irq: Unbreak interrupt affinity setting

26 Aug 2020

Commit 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs")
changed the handover logic of the vector identifier from ~vector in orig_ax
to purely register based. Unfortunately, this field has another consumer
in the APIC code which the commit did not touch. The net result was that
IRQ balancing did not work and instead resulted in interrupt storms, slowing
down the system.
This patch restores the original semantics that orig_ax contains the vector.
When we receive an interrupt now, the actual vector number stays stored in
the orig_ax field which then gets consumed by the APIC code.
To ensure that nobody else trips over this in the future, the patch also adds
comments at strategic places to warn anyone who would refactor the code that
there is another consumer of the field.
With this patch in place, IRQ balancing works as expected and performance
levels are restored to previous levels.
Reported-by: Alex bykov alex.bykov@scylladb.com
Reported-by: Avi Kivity avi@scylladb.com
Fixes: 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs")
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Graf graf@amazon.com
---
 arch/x86/entry/entry_32.S     |  2 +-
 arch/x86/entry/entry_64.S     | 17 +++++++++++------
 arch/x86/kernel/apic/vector.c |  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017..22e829c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -727,7 +727,7 @@ SYM_CODE_START_LOCAL(asm_\cfunc)
    ENCODE_FRAME_POINTER
    movl	%esp, %eax
    movl	PT_ORIG_EAX(%esp), %edx		/* get the vector from stack */
-	movl	$-1, PT_ORIG_EAX(%esp)		/* no syscall to restart */
+	/* keep vector on stack for APIC's irq_complete_move() */
    call	\cfunc
    jmp	handle_exception_return
 SYM_CODE_END(asm_\cfunc)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea93..d78fb1c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -319,7 +319,7 @@ SYM_CODE_END(ret_from_fork)
  * @cfunc:		C function to be called
  * @has_error_code:	Hardware pushed error code on stack
  */
-.macro idtentry_body cfunc has_error_code:req
+.macro idtentry_body cfunc has_error_code:req preserve_error_code:req
call	error_entry
    UNWIND_HINT_REGS
@@ -328,7 +328,9 @@ SYM_CODE_END(ret_from_fork)
.if \has_error_code == 1
    	movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
-		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
+		.if \preserve_error_code == 0
+			movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
+		.endif
    .endif
call	\cfunc
@@ -346,7 +348,7 @@ SYM_CODE_END(ret_from_fork)
  * The macro emits code to set up the kernel context for straight forward
  * and simple IDT entries. No IST stack, no paranoid entry checks.
  */
-.macro idtentry vector asmsym cfunc has_error_code:req
+.macro idtentry vector asmsym cfunc has_error_code:req preserve_error_code=0
 SYM_CODE_START(\asmsym)
    UNWIND_HINT_IRET_REGS offset=\has_error_code*8
    ASM_CLAC
@@ -369,7 +371,7 @@ SYM_CODE_START(\asmsym)
 .Lfrom_usermode_no_gap_@:
    .endif
-	idtentry_body \cfunc \has_error_code
+	idtentry_body \cfunc \has_error_code \preserve_error_code
_ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
@@ -382,11 +384,14 @@ SYM_CODE_END(\asmsym)
  * position of idtentry exceptions, and jump to one of the two idtentry points
  * (common/spurious).
  *
+ * The original vector number on the stack has to stay untouched, so that the
+ * APIC irq_complete_move() code can access it later on IRQ ack.
+ *
  * common_interrupt is a hotpath, align it to a cache line
  */
 .macro idtentry_irq vector cfunc
    .p2align CONFIG_X86_L1_CACHE_SHIFT
-	idtentry \vector asm_\cfunc \cfunc has_error_code=1
+	idtentry \vector asm_\cfunc \cfunc has_error_code=1 preserve_error_code=1
 .endm
/*
@@ -440,7 +445,7 @@ SYM_CODE_START(\asmsym)
/* Switch to the regular task stack and use the noist entry point */
 .Lfrom_usermode_switch_stack_@:
-	idtentry_body noist_\cfunc, has_error_code=0
+	idtentry_body noist_\cfunc, has_error_code=0, preserve_error_code=0
_ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index dae32d9..e81b835 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -924,7 +924,7 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
void irq_complete_move(struct irq_cfg *cfg)
 {
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+	__irq_complete_move(cfg, (u8)get_irq_regs()->orig_ax);
 }
/*
-- 
1.8.3.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




    

[PATCH] x86/irq: Preserve vector in orig_ax for APIC code