This is an automated email from the git hooks/post-receive script.
unknown user pushed a commit to branch hjl/tune/cost-sched in repository gcc.
commit 150a5d47ac247587b6ff718f0621a680fd798a57 Author: H.J. Lu hjl.tools@gmail.com Date: Fri Jul 3 07:18:07 2015 -0700
Add -mcost= and -mschedule= options to x86 backend
This patch adds -mcost= and -mschedule= options to x86 backend. They are used for tuning. --- gcc/config.gcc | 4 ++ gcc/config/i386/README.cost | 8 +++ gcc/config/i386/cost-generic.c | 90 +++++++++++++++++++++++++ gcc/config/i386/cost-intel.c | 80 ++++++++++++++++++++++ gcc/config/i386/cost.c | 86 ++++++++++++++++++++++++ gcc/config/i386/cost.h | 122 +++++++++++++++++++++++++++++++++ gcc/config/i386/i386-tune.c | 148 +++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/i386-tune.h | 19 ++++++ gcc/config/i386/i386-tune.opt | 7 ++ gcc/config/i386/i386.c | 6 +- gcc/config/i386/i386.h | 2 + 11 files changed, 571 insertions(+), 1 deletion(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc index 900aa18..e40ef63 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -498,6 +498,10 @@ if test -f ${srcdir}/config/${cpu_type}/${cpu_type}.opt then extra_options="${extra_options} ${cpu_type}/${cpu_type}.opt" fi +if test -f ${srcdir}/config/${cpu_type}/${cpu_type}-tune.opt +then + extra_options="${extra_options} ${cpu_type}/${cpu_type}-tune.opt" +fi
case ${target} in aarch64*-*-*) diff --git a/gcc/config/i386/README.cost b/gcc/config/i386/README.cost new file mode 100644 index 0000000..433187c --- /dev/null +++ b/gcc/config/i386/README.cost @@ -0,0 +1,8 @@ +1. Modify cost.c to create a new cost model. +2. Compile cost.c into cost.so: + +# gcc -o cost.so -shared -fPIC -O2 -g cost.c + +3. Use -mcost=cost to load new cost numbers from cost.so with + +# export LD_LIBRARY_PATH=directory with cost.so diff --git a/gcc/config/i386/cost-generic.c b/gcc/config/i386/cost-generic.c new file mode 100644 index 0000000..e220a53 --- /dev/null +++ b/gcc/config/i386/cost-generic.c @@ -0,0 +1,90 @@ +#include "cost.h" + +const int ix86_cost_model_size = sizeof (struct processor_costs); + +/* Generic should produce code tuned for Core-i7 (and newer chips) + and btver1 (and newer chips). */ + +static stringop_algs generic_memcpy[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs generic_memset[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +const +struct processor_costs ix86_cost_model = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* On all chips taken into consideration lea is 2 cycles and more. With + this cost however our current implementation of synth_mult results in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* Benchmarks shows large regressions on K8 sixtrack benchmark when this + value is increased to perhaps more appropriate value of 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + generic_memcpy, + generic_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; diff --git a/gcc/config/i386/cost-intel.c b/gcc/config/i386/cost-intel.c new file mode 100644 index 0000000..d67ebc0 --- /dev/null +++ b/gcc/config/i386/cost-intel.c @@ -0,0 +1,80 @@ +#include "cost.h" + +const int ix86_cost_model_size = sizeof (struct processor_costs); + +static stringop_algs intel_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs intel_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +const +struct processor_costs ix86_cost_model = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + intel_memcpy, + intel_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; diff --git a/gcc/config/i386/cost.c b/gcc/config/i386/cost.c new file mode 100644 index 0000000..2ed06bc --- /dev/null +++ b/gcc/config/i386/cost.c @@ -0,0 +1,86 @@ +#include "cost.h" + +const int ix86_cost_model_size = sizeof (struct processor_costs); + +/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes + (we ensure the alignment). For small blocks inline loop is still a + noticeable win, for bigger blocks either rep movsl or rep movsb is + way to go. Rep movsb has apparently more expensive startup time in CPU, + but after 4K the difference is down in the noise. */ +static stringop_algs pentiumpro_memcpy[2] = { + {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentiumpro_memset[2] = { + {rep_prefix_4_byte, {{1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +const +struct processor_costs ix86_cost_model = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (4)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (17), /* HI */ + COSTS_N_INSNS (17), /* SI */ + COSTS_N_INSNS (17), /* DI */ + COSTS_N_INSNS (17)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 256, /* size of l2 cache */ + 32, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (56), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + pentiumpro_memcpy, + pentiumpro_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; diff --git a/gcc/config/i386/cost.h b/gcc/config/i386/cost.h new file mode 100644 index 0000000..2ab86ad --- /dev/null +++ b/gcc/config/i386/cost.h @@ -0,0 +1,122 @@ +/* Return the right cost to give to an operation + to make the cost of the corresponding register-to-register instruction + N times that of a fast register-to-register instruction. */ +#define COSTS_N_INSNS(N) ((N) * 4) + +#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}} + +#define false 0 +#define true 1 + +/* Algorithm to expand string function with. */ +enum stringop_alg +{ +#undef DEF_ENUM +#define DEF_ENUM + +#undef DEF_ALG +#define DEF_ALG(alg, name) alg, + +#include "stringop.def" +last_alg + +#undef DEF_ENUM +#undef DEF_ALG +}; + +#define MAX_STRINGOP_ALGS 4 + +/* Specify what algorithm to use for stringops on known size. + When size is unknown, the UNKNOWN_SIZE alg is used. When size is + known at compile time or estimated via feedback, the SIZE array + is walked in order until MAX is greater then the estimate (or -1 + means infinity). Corresponding ALG is used then. + When NOALIGN is true the code guaranting the alignment of the memory + block is skipped. + + For example initializer: + {{256, loop}, {-1, rep_prefix_4_byte}} + will use loop for blocks smaller or equal to 256 bytes, rep prefix will + be used otherwise. */ +typedef struct stringop_algs +{ + const enum stringop_alg unknown_size; + const struct stringop_strategy { + const int max; + const enum stringop_alg alg; + int noalign; + } size [MAX_STRINGOP_ALGS]; +} stringop_algs; + +/* Define the specific costs for a given cpu */ + +struct processor_costs { + const int add; /* cost of an add instruction */ + const int lea; /* cost of a lea instruction */ + const int shift_var; /* variable shift costs */ + const int shift_const; /* constant shift costs */ + const int mult_init[5]; /* cost of starting a multiply + in QImode, HImode, SImode, DImode, TImode*/ + const int mult_bit; /* cost of multiply per each bit set */ + const int divide[5]; /* cost of a divide/mod + in QImode, HImode, SImode, DImode, TImode*/ + int movsx; /* The cost of movsx operation. */ + int movzx; /* The cost of movzx operation. */ + const int large_insn; /* insns larger than this cost more */ + const int move_ratio; /* The threshold of number of scalar + memory-to-memory move insns. */ + const int movzbl_load; /* cost of loading using movzbl */ + const int int_load[3]; /* cost of loading integer registers + in QImode, HImode and SImode relative + to reg-reg move (2). */ + const int int_store[3]; /* cost of storing integer register + in QImode, HImode and SImode */ + const int fp_move; /* cost of reg,reg fld/fst */ + const int fp_load[3]; /* cost of loading FP register + in SFmode, DFmode and XFmode */ + const int fp_store[3]; /* cost of storing FP register + in SFmode, DFmode and XFmode */ + const int mmx_move; /* cost of moving MMX register. */ + const int mmx_load[2]; /* cost of loading MMX register + in SImode and DImode */ + const int mmx_store[2]; /* cost of storing MMX register + in SImode and DImode */ + const int sse_move; /* cost of moving SSE register. */ + const int sse_load[3]; /* cost of loading SSE register + in SImode, DImode and TImode*/ + const int sse_store[3]; /* cost of storing SSE register + in SImode, DImode and TImode*/ + const int mmxsse_to_integer; /* cost of moving mmxsse register to + integer and vice versa. */ + const int l1_cache_size; /* size of l1 cache, in kilobytes. */ + const int l2_cache_size; /* size of l2 cache, in kilobytes. */ + const int prefetch_block; /* bytes moved to cache for prefetch. */ + const int simultaneous_prefetches; /* number of parallel prefetch + operations. */ + const int branch_cost; /* Default value for BRANCH_COST. */ + const int fadd; /* cost of FADD and FSUB instructions. */ + const int fmul; /* cost of FMUL instruction. */ + const int fdiv; /* cost of FDIV instruction. */ + const int fabs; /* cost of FABS instruction. */ + const int fchs; /* cost of FCHS instruction. */ + const int fsqrt; /* cost of FSQRT instruction. */ + /* Specify what algorithm + to use for stringops on unknown size. */ + struct stringop_algs *memcpy, *memset; + const int scalar_stmt_cost; /* Cost of any scalar operation, excluding + load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, excluding + load, store, vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer + cost model. */ + const int cond_not_taken_branch_cost;/* Cost of not taken branch for + vectorizer cost model. */ +}; diff --git a/gcc/config/i386/i386-tune.c b/gcc/config/i386/i386-tune.c new file mode 100644 index 0000000..9cf702c --- /dev/null +++ b/gcc/config/i386/i386-tune.c @@ -0,0 +1,148 @@ +#include <dlfcn.h> + +#define COST_MODEL "ix86_cost_model" +#define COST_MODEL_SIZE COST_MODEL "_size" + +static const struct processor_costs * +load_cost_model (const char *model) +{ + const struct processor_costs *cost; + int *cost_size; + void *handle; + char *dso = (char *) alloca (strlen (model) + sizeof ".so"); + + strcpy (stpcpy (dso, model), ".so"); + + handle = dlopen (dso, RTLD_LAZY); + if (handle == NULL) + { + fprintf(stderr, "load_cost_model: dlopen: %s\n", dlerror ()); + return NULL; + } + + cost = (const struct processor_costs *) dlsym (handle, COST_MODEL); + if (cost == NULL) + { + fprintf(stderr, "load_cost_model: dlsym: %s\n", dlerror ()); + goto error_return; + } + + cost_size = (int *) dlsym (handle, COST_MODEL_SIZE); + if (cost == NULL) + { + fprintf(stderr, "load_cost_model: dlsym: %s\n", dlerror ()); + goto error_return; + } + + if (*cost_size != sizeof (*cost)) + { + fprintf(stderr, "load_cost_model: cost model size %d != %d\n", + *cost_size, (int) sizeof (*cost)); + cost = NULL; + } + +error_return: + if (cost == NULL) + dlclose (handle); + + return cost; +} + +void +x86_tune_options (void) +{ + if (x86_cost_string) + { + if (strcmp (x86_cost_string, "size") == 0) + ix86_cost = &ix86_size_cost; + else if (strcmp (x86_cost_string, "i386") == 0) + ix86_cost = &i386_cost; + else if (strcmp (x86_cost_string, "i486") == 0) + ix86_cost = &i486_cost; + else if (strcmp (x86_cost_string, "pentium") == 0) + ix86_cost = &pentium_cost; + else if (strcmp (x86_cost_string, "pentiumpro") == 0) + ix86_cost = &pentiumpro_cost; + else if (strcmp (x86_cost_string, "geode") == 0) + ix86_cost = &geode_cost; + else if (strcmp (x86_cost_string, "k6") == 0) + ix86_cost = &k6_cost; + else if (strcmp (x86_cost_string, "athlon") == 0) + ix86_cost = &athlon_cost; + else if (strcmp (x86_cost_string, "k8") == 0) + ix86_cost = &k8_cost; + else if (strcmp (x86_cost_string, "amdfam10") == 0) + ix86_cost = &amdfam10_cost; + else if (strcmp (x86_cost_string, "bdver1") == 0) + ix86_cost = &bdver1_cost; + else if (strcmp (x86_cost_string, "bdver2") == 0) + ix86_cost = &bdver2_cost; + else if (strcmp (x86_cost_string, "bdver3") == 0) + ix86_cost = &bdver3_cost; + else if (strcmp (x86_cost_string, "bdver4") == 0) + ix86_cost = &bdver4_cost; + else if (strcmp (x86_cost_string, "btver1") == 0) + ix86_cost = &btver1_cost; + else if (strcmp (x86_cost_string, "btver2") == 0) + ix86_cost = &btver2_cost; + else if (strcmp (x86_cost_string, "pentium4") == 0) + ix86_cost = &pentium4_cost; + else if (strcmp (x86_cost_string, "nocona") == 0) + ix86_cost = &nocona_cost; + else if (strcmp (x86_cost_string, "atom") == 0) + ix86_cost = &atom_cost; + else if (strcmp (x86_cost_string, "slm") == 0) + ix86_cost = &slm_cost; + else if (strcmp (x86_cost_string, "generic") == 0) + ix86_cost = &generic_cost; + else if (strcmp (x86_cost_string, "core") == 0) + ix86_cost = &core_cost; + else + { + ix86_cost = load_cost_model (x86_cost_string); + if (ix86_cost == NULL) + abort (); + } + } + if (x86_schedule_string) + { + if (strcmp (x86_schedule_string, "none") == 0) + ix86_schedule = CPU_NONE; + else if (strcmp (x86_schedule_string, "pentium") == 0) + ix86_schedule = CPU_PENTIUM; + else if (strcmp (x86_schedule_string, "pentiumpro") == 0) + ix86_schedule = CPU_PENTIUMPRO; + else if (strcmp (x86_schedule_string, "geode") == 0) + ix86_schedule = CPU_GEODE; + else if (strcmp (x86_schedule_string, "k6") == 0) + ix86_schedule = CPU_K6; + else if (strcmp (x86_schedule_string, "athlon") == 0) + ix86_schedule = CPU_ATHLON; + else if (strcmp (x86_schedule_string, "k8") == 0) + ix86_schedule = CPU_K8; + else if (strcmp (x86_schedule_string, "core2") == 0) + ix86_schedule = CPU_CORE2; + else if (strcmp (x86_schedule_string, "corei7") == 0) + ix86_schedule = CPU_NEHALEM; + else if (strcmp (x86_schedule_string, "atom") == 0) + ix86_schedule = CPU_ATOM; + else if (strcmp (x86_schedule_string, "slm") == 0) + ix86_schedule = CPU_SLM; + else if (strcmp (x86_schedule_string, "generic") == 0) + ix86_schedule = CPU_GENERIC; + else if (strcmp (x86_schedule_string, "amdfam10") == 0) + ix86_schedule = CPU_AMDFAM10; + else if (strcmp (x86_schedule_string, "bdver1") == 0) + ix86_schedule = CPU_BDVER1; + else if (strcmp (x86_schedule_string, "bdver2") == 0) + ix86_schedule = CPU_BDVER2; + else if (strcmp (x86_schedule_string, "bdver3") == 0) + ix86_schedule = CPU_BDVER3; + else if (strcmp (x86_schedule_string, "bdver4") == 0) + ix86_schedule = CPU_BDVER4; + else if (strcmp (x86_schedule_string, "btver2") == 0) + ix86_schedule = CPU_BTVER2; + else + abort (); + } +} diff --git a/gcc/config/i386/i386-tune.h b/gcc/config/i386/i386-tune.h new file mode 100644 index 0000000..50ab949 --- /dev/null +++ b/gcc/config/i386/i386-tune.h @@ -0,0 +1,19 @@ +/* Tuning options: + + -mcost=[size|i386|i486|pentium|pentiumpro|geode|k6|athlon|k8|amdfam10|bdver1|bdver2|bdver3|bdver4|btver1|btver2|pentium4|nocona|atom|slm|generic|core] + + -missue-rate=NUMBER + + -madjust-cost=NUMBER + + -mmultipass-dfa-lookahead=NUMBER + + */ + +extern void x86_tune_options (void); + +extern int ix86_issue_rate_value; + +extern int ix86_adjust_cost_value; + +extern int ia32_multipass_dfa_lookahead_value; diff --git a/gcc/config/i386/i386-tune.opt b/gcc/config/i386/i386-tune.opt new file mode 100644 index 0000000..d1bcc88 --- /dev/null +++ b/gcc/config/i386/i386-tune.opt @@ -0,0 +1,7 @@ +; Tuning options for i386. + +mcost= +Target RejectNegative Joined Report Var(x86_cost_string) Undocumented + +mschedule= +Target RejectNegative Joined Report Var(x86_schedule_string) Undocumented diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 54ee6f3..3eae830 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3917,7 +3917,7 @@ ix86_option_override_internal (bool main_args_p, ix86_tune = processor_alias_table[i].processor; if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) { - if (!(processor_alias_table[i].flags & PTA_64BIT)) + if (0 && !(processor_alias_table[i].flags & PTA_64BIT)) { if (ix86_tune_defaulted) { @@ -4003,6 +4003,8 @@ ix86_option_override_internal (bool main_args_p, else ix86_cost = ix86_tune_cost;
+ x86_tune_options (); + /* Arrange to set up i386_stack_locals for all functions. */ init_machine_status = ix86_init_machine_status;
@@ -52679,6 +52681,8 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load, #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
+#include "config/i386/i386-tune.c" + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 74334ff..8fc5606 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2577,3 +2577,5 @@ Local variables: version-control: t End: */ + +#include "config/i386/i386-tune.h"