This is an automated email from the git hooks/post-receive script.
unknown user pushed a commit to branch hjl/iamcu/improve in repository gcc.
commit 0ef31c38ed76fdeac5906c35311a44fbe3df3819 Author: H.J. Lu hjl.tools@gmail.com Date: Tue Jul 21 14:32:09 2015 -0700
Add __builtin_stack_top
When __builtin_frame_address is used to retrieve the address of the function stack frame, the frame pointer register is required, which wastes one register and 2 instructions. For x86-32, one less register means significant negative impact on performance. This patch adds a new builtin function, __builtin_stack_top, to x86 backend. It returns the stack address when the function is called.
gcc/
PR target/66960 * config/i386/i386.c (ix86_expand_prologue): Sorry if DRAP is used and the stack address has been taken. (ix86_builtins): Add IX86_BUILTIN_STACK_TOP. (ix86_init_mmx_sse_builtins): Add __builtin_stack_top. (ix86_expand_builtin): Handle IX86_BUILTIN_STACK_TOP. * config/i386/i386.h (machine_function): Add stack_top_taken. * doc/extend.texi: Document __builtin_stack_top.
gcc/testsuite/
PR target/66960 * gcc.target/i386/pr66960-1.c: New test. * gcc.target/i386/pr66960-2.c: Likewise. * gcc.target/i386/pr66960-3.c: Likewise. * gcc.target/i386/pr66960-4.c: Likewise. * gcc.target/i386/pr66960-5.c: Likewise. --- gcc/config/i386/i386.c | 28 ++++++++++++++++++++++++++ gcc/config/i386/i386.h | 4 ++++ gcc/doc/extend.texi | 6 ++++++ gcc/testsuite/gcc.target/i386/pr66960-1.c | 33 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-2.c | 33 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-3.c | 17 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-4.c | 21 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-5.c | 21 ++++++++++++++++++++ 8 files changed, 163 insertions(+)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ede8ea0..945bbf8 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11575,6 +11575,12 @@ ix86_expand_prologue (void) { int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ /* Can't use DRAP if the stack address has been taken. */ + if (cfun->machine->stack_top_taken) + sorry ("%<__builtin_stack_top%> not supported with stack" + " realignment. This may be worked around by adding" + " -maccumulate-outgoing-args."); + /* Only need to push parameter pointer reg if it is caller saved. */ if (!call_used_regs[REGNO (crtl->drap_reg)]) { @@ -30741,6 +30747,9 @@ enum ix86_builtins IX86_BUILTIN_READ_FLAGS, IX86_BUILTIN_WRITE_FLAGS,
+ /* Get the stack address when the function is called. */ + IX86_BUILTIN_STACK_TOP, + IX86_BUILTIN_MAX };
@@ -34353,6 +34362,10 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
+ /* Get the stack address when the function is called. */ + def_builtin (0, "__builtin_stack_top", + PVOID_FTYPE_VOID, IX86_BUILTIN_STACK_TOP); + /* Add FMA4 multi-arg argument instructions */ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) { @@ -40291,6 +40304,21 @@ addcarryx: emit_insn (gen_xabort (op0)); return 0;
+ case IX86_BUILTIN_STACK_TOP: + cfun->machine->stack_top_taken = true; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + /* After the prologue, stack top is at -WORD(AP) in the current + frame. */ + emit_insn (gen_rtx_SET (target, + plus_constant (Pmode, arg_pointer_rtx, + -UNITS_PER_WORD))); + return target; + default: break; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7bd23ec..3781a22 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2492,6 +2492,10 @@ struct GTY(()) machine_function { /* If true, it is safe to not save/restore DRAP register. */ BOOL_BITFIELD no_drap_save_restore : 1;
+ /* If true, the stack address of the current function has been + taken. */ + BOOL_BITFIELD stack_top_taken : 1; + /* If true, there is register available for argument passing. This is used only in ix86_function_ok_for_sibcall by 32-bit to determine if there is scratch register available for indirect sibcall. In diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b18d8fb..971b584 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16661,6 +16661,12 @@ The following built-in function is always available. @item void __builtin_ia32_pause (void) Generates the @code{pause} machine instruction with a compiler memory barrier. + +@item void *__builtin_stack_top (void) +This function is similar to calling @code{__builtin_frame_address} +with a value of @code{0}, but it returns the stack address when the +function is called. Unlike @code{__builtin_frame_address}, the frame +pointer register isn't required. @end table
The following floating-point built-in functions are made available in the diff --git a/gcc/testsuite/gcc.target/i386/pr66960-1.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..aaab3cf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fomit-frame-pointer" { target { lp64 } } } */ +/* { dg-options "-O2 -fomit-frame-pointer -maddress-mode=short" { target { x32 } } } */ +/* { dg-options "-O2 -fomit-frame-pointer -miamcu" { target { ia32 } } } */ + +extern char **environ; +extern void exit (int status); +extern int main (long argc, char **argv, char **envp); + +void +_start (void) +{ + void *argc_p = __builtin_stack_top (); + char **argv = (char **) (argc_p + sizeof (void *)); + long argc = *(long *) argc_p; + int status; + + environ = argv + argc + 1; + + status = main (argc, argv, environ); + + exit (status); +} + +/* { dg-final { scan-assembler "movq[ \t]8\(%rsp\), %rdi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]16\(%rsp\), %rsi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]24\(%rsp,%rdi,8\), %rdx" { target lp6 [...] +/* { dg-final { scan-assembler "movl[ \t]8\(%esp\), %edi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%rsp\), %esi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%rsi,%rdi,4\), %edx" { target x32 [...] +/* { dg-final { scan-assembler "movl[ \t]\(%esp\), %eax" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%esp\), %edx" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%esp,%eax,4\), %ecx" { target ia32 [...] diff --git a/gcc/testsuite/gcc.target/i386/pr66960-2.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..b9dbde2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-2.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-omit-frame-pointer" { target { lp64 } } } */ +/* { dg-options "-O2 -fno-omit-frame-pointer -maddress-mode=short" { target { x32 [...] +/* { dg-options "-O2 -fno-omit-frame-pointer -miamcu" { target { ia32 } } } */ + +extern char **environ; +extern void exit (int status); +extern int main (long argc, char **argv, char **envp); + +void +_start (void) +{ + void *argc_p = __builtin_stack_top (); + char **argv = (char **) (argc_p + sizeof (void *)); + long argc = *(long *) argc_p; + int status; + + environ = argv + argc + 1; + + status = main (argc, argv, environ); + + exit (status); +} + +/* { dg-final { scan-assembler "movq[ \t]8\(%rbp\), %rdi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]16\(%rbp\), %rsi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]24\(%rbp,%rdi,8\), %rdx" { target lp6 [...] +/* { dg-final { scan-assembler "movl[ \t]8\(%ebp\), %edi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%rbp\), %esi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%rsi,%rdi,4\), %edx" { target x32 [...] +/* { dg-final { scan-assembler "movl[ \t]4\(%ebp\), %eax" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%ebp\), %edx" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%ebp,%eax,4\), %ecx" { target ia3 [...] diff --git a/gcc/testsuite/gcc.target/i386/pr66960-3.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..48cf25e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -mno-accumulate-outgoing-args" { target { lp64 } } } */ +/* { dg-options "-O2 -mno-accumulate-outgoing-args -maddress-mode=short" { target [...] +/* { dg-options "-O2 -mno-accumulate-outgoing-args -miamcu" { target { ia32 } } } */ + +extern void abort (void); +extern int check_int (int *i, int align); +typedef int aligned __attribute__((aligned(64))); + +void * +foo (void) +{ + aligned j; + if (check_int (&j, __alignof__(j)) != j) + abort (); + return __builtin_stack_top (); +} /* { dg-message "sorry, unimplemented: .__builtin_stack_top. not supported" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66960-4.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..44c0b26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -maccumulate-outgoing-args" { target { lp64 } } } */ +/* { dg-options "-O2 -maccumulate-outgoing-args -maddress-mode=short" { target { x [...] +/* { dg-options "-O2 -maccumulate-outgoing-args -miamcu" { target { ia32 } } } */ + +extern void abort (void); +extern int check_int (int *i, int align); +typedef int aligned __attribute__((aligned(64))); + +void * +foo (void) +{ + aligned j; + if (check_int (&j, __alignof__(j)) != j) + abort (); + return __builtin_stack_top (); +} + +/* { dg-final { scan-assembler "leaq[ \t]8\(%rbp\), %rax" { target lp64 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%rbp\), %eax" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%ebp\), %eax" { target ia32 } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66960-5.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..d449437 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-5.c @@ -0,0 +1,21 @@ +/* { dg-do link } */ +/* { dg-options "-O" } */ + +extern void link_error (void); + +__attribute__ ((noinline, noclone)) +void +foo (void) +{ + void **p = __builtin_stack_top (); + void *ra = __builtin_return_address (0); + if (*p != ra) + link_error (); +} + +int +main (void) +{ + foo (); + return 0; +}