在 2025/1/17 12:43, Xin Li 写道:
On 1/16/2025 8:19 PM, Ethan Zhao wrote:
在 2025/1/17 9:21, H. Peter Anvin 写道:
On 1/16/25 16:37, Ethan Zhao wrote:
hpa suggested to introduce "switch_likely" for this kind of optimization on a switch statement, which is also easier to read. I measured it with a user space focus test, it does improve performance a lot. But obviously there are still a lot of work to do.
Find a way to instruct compiler to pick the right hot branch meanwhile make folks reading happy... yup, a lot of work.
It's not that complicated, believe it or not.
/* * switch(v) biased for speed in the case v == l * * Note: gcc is quite sensitive to the exact form of this * expression. */ #define switch_likely(v,l) \ switch((__typeof__(v))__builtin_expect((v),(l)))
I tried this macro as following, but got something really *weird* from gcc.
+#define switch_likely(v,l) \ + switch((__typeof__(v))__builtin_expect((v),(l)))
__visible noinstr void fred_entry_from_user(struct pt_regs *regs) { unsigned long error_code = regs->orig_ax; + unsigned short etype = regs->fred_ss.type & 0xf;
/* Invalidate orig_ax so that syscall_get_nr() works correctly */ regs->orig_ax = -1;
- switch (regs->fred_ss.type) { + switch_likely ((etype == EVENT_TYPE_EXTINT || etype == EVENT_TYPE_OTHER), etype) {
Just swap the 2 arguments, and it should be: + switch_likely (etype, EVENT_TYPE_OTHER) {
after swapped the parameters as following: +#define switch_likely(v,l) \ + switch((__typeof__(v))__builtin_expect((v),(l))) + __visible noinstr void fred_entry_from_user(struct pt_regs *regs) { unsigned long error_code = regs->orig_ax; + unsigned short etype = regs->fred_ss.type & 0xf;
/* Invalidate orig_ax so that syscall_get_nr() works correctly */ regs->orig_ax = -1;
- switch (regs->fred_ss.type) { + switch_likely (etype, (EVENT_TYPE_EXTINT == etype || EVENT_TYPE_OTHER == etype)) { case EVENT_TYPE_EXTINT: return fred_extint(regs); case EVENT_TYPE_NMI: @@ -256,11 +260,12 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs) __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs) { unsigned long error_code = regs->orig_ax; + unsigned short etype = regs->fred_ss.type & 0xf;
/* Invalidate orig_ax so that syscall_get_nr() works correctly */ regs->orig_ax = -1;
- switch (regs->fred_ss.type) { + switch_likely (etype, EVENT_TYPE_EXTINT == etype) { case EVENT_TYPE_EXTINT: return fred_extint(regs);
case EVENT_TYPE_NMI:
Good news -- the gcc works normal, generated right asm code, Bad news -- it fails back to binary search method to do matching, ignored our hints.
$objdump -d vmlinux.o | awk '/<fred_entry_from_kernel>:/{c=65} c&&c--' 00000000000016b0 <fred_entry_from_kernel>: 16b0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax 16b7: 48 8b 77 78 mov 0x78(%rdi),%rsi 16bb: 55 push %rbp 16bc: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi) 16c3: ff 16c4: 83 e0 0f and $0xf,%eax 16c7: 48 89 e5 mov %rsp,%rbp 16ca: 3c 03 cmp $0x3,%al 16cc: 74 3c je 170a <fred_entry_from_kernel+0x5a> 16ce: 76 13 jbe 16e3 <fred_entry_from_kernel+0x33> 16d0: 3c 05 cmp $0x5,%al 16d2: 74 41 je 1715 <fred_entry_from_kernel+0x65> 16d4: 3c 06 cmp $0x6,%al 16d6: 75 27 jne 16ff <fred_entry_from_kernel+0x4f> 16d8: e8 73 fe ff ff callq 1550 <fred_swexc> 16dd: 5d pop %rbp 16de: e9 00 00 00 00 jmpq 16e3 <fred_entry_from_kernel+0x33> 16e3: 84 c0 test %al,%al 16e5: 74 42 je 1729 <fred_entry_from_kernel+0x79> 16e7: 3c 02 cmp $0x2,%al 16e9: 75 14 jne 16ff <fred_entry_from_kernel+0x4f> 16eb: 80 bf a4 00 00 00 02 cmpb $0x2,0xa4(%rdi) 16f2: 75 0b jne 16ff <fred_entry_from_kernel+0x4f> 16f4: e8 00 00 00 00 callq 16f9 <fred_entry_from_kernel+0x49> 16f9: 5d pop %rbp 16fa: e9 00 00 00 00 jmpq 16ff <fred_entry_from_kernel+0x4f> 16ff: e8 1c fc ff ff callq 1320 <fred_bad_type> 1704: 5d pop %rbp 1705: e9 00 00 00 00 jmpq 170a <fred_entry_from_kernel+0x5a> 170a: e8 21 fd ff ff callq 1430 <fred_hwexc> 170f: 5d pop %rbp 1710: e9 00 00 00 00 jmpq 1715 <fred_entry_from_kernel+0x65> 1715: 80 bf a4 00 00 00 01 cmpb $0x1,0xa4(%rdi) 171c: 75 e1 jne 16ff <fred_entry_from_kernel+0x4f> 171e: e8 00 00 00 00 callq 1723 <fred_entry_from_kernel+0x73> 1723: 5d pop %rbp 1724: e9 00 00 00 00 jmpq 1729 <fred_entry_from_kernel+0x79> 1729: e8 12 fb ff ff callq 1240 <fred_extint> 172e: 5d pop %rbp 172f: e9 00 00 00 00 jmpq 1734 <fred_entry_from_kernel+0x84> 1734: 66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1) 173b: 00 00 00 00 173f: 90 nop
$ objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--' 00000000000015a0 <fred_entry_from_user>: 15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax 15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi 15ab: 55 push %rbp 15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi) 15b3: ff 15b4: 83 e0 0f and $0xf,%eax 15b7: 48 89 e5 mov %rsp,%rbp 15ba: 3c 04 cmp $0x4,%al 15bc: 74 78 je 1636 <fred_entry_from_user+0x96> 15be: 77 15 ja 15d5 <fred_entry_from_user+0x35> 15c0: 3c 02 cmp $0x2,%al 15c2: 74 53 je 1617 <fred_entry_from_user+0x77> 15c4: 77 65 ja 162b <fred_entry_from_user+0x8b> 15c6: 84 c0 test %al,%al 15c8: 75 42 jne 160c <fred_entry_from_user+0x6c> 15ca: e8 71 fc ff ff callq 1240 <fred_extint> 15cf: 5d pop %rbp 15d0: e9 00 00 00 00 jmpq 15d5 <fred_entry_from_user+0x35> 15d5: 3c 06 cmp $0x6,%al 15d7: 74 7c je 1655 <fred_entry_from_user+0xb5> 15d9: 72 66 jb 1641 <fred_entry_from_user+0xa1> 15db: 3c 07 cmp $0x7,%al 15dd: 75 2d jne 160c <fred_entry_from_user+0x6c> 15df: 8b 87 a4 00 00 00 mov 0xa4(%rdi),%eax 15e5: 25 ff 00 00 02 and $0x20000ff,%eax 15ea: 3d 01 00 00 02 cmp $0x2000001,%eax 15ef: 75 6f jne 1660 <fred_entry_from_user+0xc0> 15f1: 48 8b 77 50 mov 0x50(%rdi),%rsi 15f5: 48 c7 47 50 da ff ff movq $0xffffffffffffffda,0x50(%rdi) 15fc: ff 15fd: 48 89 77 78 mov %rsi,0x78(%rdi) 1601: e8 00 00 00 00 callq 1606 <fred_entry_from_user+0x66> 1606: 5d pop %rbp 1607: e9 00 00 00 00 jmpq 160c <fred_entry_from_user+0x6c> 160c: e8 0f fd ff ff callq 1320 <fred_bad_type> 1611: 5d pop %rbp 1612: e9 00 00 00 00 jmpq 1617 <fred_entry_from_user+0x77> 1617: 80 bf a4 00 00 00 02 cmpb $0x2,0xa4(%rdi) 161e: 75 ec jne 160c <fred_entry_from_user+0x6c> 1620: e8 00 00 00 00 callq 1625 <fred_entry_from_user+0x85> 1625: 5d pop %rbp 1626: e9 00 00 00 00 jmpq 162b <fred_entry_from_user+0x8b> 162b: e8 00 fe ff ff callq 1430 <fred_hwexc> 1630: 5d pop %rbp 1631: e9 00 00 00 00 jmpq 1636 <fred_entry_from_user+0x96> 1636: e8 85 fc ff ff callq 12c0 <fred_intx> 163b: 5d pop %rbp 163c: e9 00 00 00 00 jmpq 1641 <fred_entry_from_user+0xa1> 1641: 80 bf a4 00 00 00 01 cmpb $0x1,0xa4(%rdi) 1648: 75 c2 jne 160c <fred_entry_from_user+0x6c> 164a: e8 00 00 00 00 callq 164f <fred_entry_from_user+0xaf> 164f: 5d pop %rbp 1650: e9 00 00 00 00 jmpq 1655 <fred_entry_from_user+0xb5> 1655: e8 f6 fe ff ff callq 1550 <fred_swexc> 165a: 5d pop %rbp 165b: e9 00 00 00 00 jmpq 1660 <fred_entry_from_user+0xc0> 1660: 83 f8 02 cmp $0x2,%eax 1663: 75 24 jne 1689 <fred_entry_from_user+0xe9> 1665: 80 3d 00 00 00 00 00 cmpb $0x0,0x0(%rip) # 166c <fred_entry_from_user+0xcc> 166c: 74 1b je 1689 <fred_entry_from_user+0xe9> 166e: 48 8b 47 50 mov 0x50(%rdi),%rax 1672: 48 c7 47 50 da ff ff movq $0xffffffffffffffda,0x50(%rdi) 1679: ff 167a: 48 89 47 78 mov %rax,0x78(%rdi)
In short, seems that __builtin_expect not work with switch(), at least for gcc version 8.5.0 20210514(RHEL).
Thanks, Ethan
Probably also check __builtin_expect on https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
case EVENT_TYPE_EXTINT: return fred_extint(regs); case EVENT_TYPE_NMI: @@ -256,11 +260,12 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs) __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs) { unsigned long error_code = regs->orig_ax; + unsigned short etype = regs->fred_ss.type & 0xf;
/* Invalidate orig_ax so that syscall_get_nr() works correctly */ regs->orig_ax = -1;
- switch (regs->fred_ss.type) { + switch_likely (etype == EVENT_TYPE_EXTINT, etype) { case EVENT_TYPE_EXTINT: return fred_extint(regs); case EVENT_TYPE_NMI:
Got the asm code as following:
objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--' 00000000000015a0 <fred_entry_from_user>: 15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax 15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi 15ab: 55 push %rbp 15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi) 15b3: ff 15b4: 48 89 e5 mov %rsp,%rbp 15b7: 66 83 e0 0f and $0xf,%ax 15bb: 74 11 je 15ce <fred_entry_from_user+0x2e> 15bd: 66 83 f8 07 cmp $0x7,%ax 15c1: 74 0b je 15ce <fred_entry_from_user+0x2e> 15c3: e8 78 fc ff ff callq 1240 <fred_extint> 15c8: 5d pop %rbp 15c9: e9 00 00 00 00 jmpq 15ce <fred_entry_from_user+0x2e> 15ce: e8 4d fd ff ff callq 1320 <fred_bad_type> 15d3: 5d pop %rbp 15d4: e9 00 00 00 00 jmpq 15d9 <fred_entry_from_user+0x39> 15d9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
00000000000015e0 <__pfx_fred_entry_from_kernel>: 15e0: 90 nop 15e1: 90 nop
00000000000015f0 <fred_entry_from_kernel>: 15f0: 55 push %rbp 15f1: 48 8b 77 78 mov 0x78(%rdi),%rsi 15f5: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi) 15fc: ff 15fd: 48 89 e5 mov %rsp,%rbp 1600: f6 87 a6 00 00 00 0f testb $0xf,0xa6(%rdi) 1607: 75 0b jne 1614 <fred_entry_from_kernel+0x24> 1609: e8 12 fd ff ff callq 1320 <fred_bad_type> 160e: 5d pop %rbp 160f: e9 00 00 00 00 jmpq 1614 <fred_entry_from_kernel+0x24> 1614: e8 27 fc ff ff callq 1240 <fred_extint> 1619: 5d pop %rbp 161a: e9 00 00 00 00 jmpq 161f <fred_entry_from_kernel+0x2f> 161f: 90 nop
0000000000001620 <__pfx___fred_entry_from_kvm>: 1620: 90 nop 1621: 90 nop
Even the fred_entry_from_kernel() asm code doesn't look right. *gcc version 8.5.0 20210514 (Red Hat 8.5.0-10) (GCC)* ** *Did I screw up something ?* ** *Thanks,* *Ethan*
-hpa