[gcc] 01/02: Add -mcost= and -mschedule= options to x86 backend - Tcwg-commits

9 Jul 2015

This is an automated email from the git hooks/post-receive script.
unknown user pushed a commit to branch hjl/tune/cost-sched
in repository gcc.
commit 150a5d47ac247587b6ff718f0621a680fd798a57
Author: H.J. Lu hjl.tools@gmail.com
Date:   Fri Jul 3 07:18:07 2015 -0700
Add -mcost= and -mschedule= options to x86 backend
This patch adds -mcost= and -mschedule= options to x86 backend.  They
    are used for tuning.
---
 gcc/config.gcc                 |   4 ++
 gcc/config/i386/README.cost    |   8 +++
 gcc/config/i386/cost-generic.c |  90 +++++++++++++++++++++++++
 gcc/config/i386/cost-intel.c   |  80 ++++++++++++++++++++++
 gcc/config/i386/cost.c         |  86 ++++++++++++++++++++++++
 gcc/config/i386/cost.h         | 122 +++++++++++++++++++++++++++++++++
 gcc/config/i386/i386-tune.c    | 148 +++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/i386-tune.h    |  19 ++++++
 gcc/config/i386/i386-tune.opt  |   7 ++
 gcc/config/i386/i386.c         |   6 +-
 gcc/config/i386/i386.h         |   2 +
 11 files changed, 571 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 900aa18..e40ef63 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -498,6 +498,10 @@ if test -f ${srcdir}/config/${cpu_type}/${cpu_type}.opt
 then
    extra_options="${extra_options} ${cpu_type}/${cpu_type}.opt"
 fi
+if test -f ${srcdir}/config/${cpu_type}/${cpu_type}-tune.opt
+then
+	extra_options="${extra_options} ${cpu_type}/${cpu_type}-tune.opt"
+fi
case ${target} in
 aarch64*-*-*)
diff --git a/gcc/config/i386/README.cost b/gcc/config/i386/README.cost
new file mode 100644
index 0000000..433187c
--- /dev/null
+++ b/gcc/config/i386/README.cost
@@ -0,0 +1,8 @@
+1. Modify cost.c to create a new cost model.
+2. Compile cost.c into cost.so:
+
+# gcc -o cost.so -shared -fPIC -O2 -g cost.c
+
+3. Use -mcost=cost to load new cost numbers from cost.so with
+
+# export LD_LIBRARY_PATH=directory with cost.so
diff --git a/gcc/config/i386/cost-generic.c b/gcc/config/i386/cost-generic.c
new file mode 100644
index 0000000..e220a53
--- /dev/null
+++ b/gcc/config/i386/cost-generic.c
@@ -0,0 +1,90 @@
+#include "cost.h"
+
+const int ix86_cost_model_size = sizeof (struct processor_costs);
+
+/* Generic should produce code tuned for Core-i7 (and newer chips)
+   and btver1 (and newer chips).  */
+
+static stringop_algs generic_memcpy[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs generic_memset[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const
+struct processor_costs ix86_cost_model = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+     value is increased to perhaps more appropriate value of 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  generic_memcpy,
+  generic_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
diff --git a/gcc/config/i386/cost-intel.c b/gcc/config/i386/cost-intel.c
new file mode 100644
index 0000000..d67ebc0
--- /dev/null
+++ b/gcc/config/i386/cost-intel.c
@@ -0,0 +1,80 @@
+#include "cost.h"
+
+const int ix86_cost_model_size = sizeof (struct processor_costs);
+
+static stringop_algs intel_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs intel_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+const
+struct processor_costs ix86_cost_model = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  intel_memcpy,
+  intel_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
diff --git a/gcc/config/i386/cost.c b/gcc/config/i386/cost.c
new file mode 100644
index 0000000..2ed06bc
--- /dev/null
+++ b/gcc/config/i386/cost.c
@@ -0,0 +1,86 @@
+#include "cost.h"
+
+const int ix86_cost_model_size = sizeof (struct processor_costs);
+
+/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+   (we ensure the alignment).  For small blocks inline loop is still a
+   noticeable win, for bigger blocks either rep movsl or rep movsb is
+   way to go.  Rep movsb has apparently more expensive startup time in CPU,
+   but after 4K the difference is down in the noise.  */
+static stringop_algs pentiumpro_memcpy[2] = {
+  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentiumpro_memset[2] = {
+  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+const
+struct processor_costs ix86_cost_model = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (4)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (17),			/*			    HI */
+   COSTS_N_INSNS (17),			/*			    SI */
+   COSTS_N_INSNS (17),			/*			    DI */
+   COSTS_N_INSNS (17)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache  */
+  32,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  pentiumpro_memcpy,
+  pentiumpro_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
diff --git a/gcc/config/i386/cost.h b/gcc/config/i386/cost.h
new file mode 100644
index 0000000..2ab86ad
--- /dev/null
+++ b/gcc/config/i386/cost.h
@@ -0,0 +1,122 @@
+/* Return the right cost to give to an operation
+   to make the cost of the corresponding register-to-register instruction
+   N times that of a fast register-to-register instruction.  */
+#define COSTS_N_INSNS(N) ((N) * 4)
+
+#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
+
+#define false 0
+#define true 1
+
+/* Algorithm to expand string function with.  */
+enum stringop_alg
+{
+#undef DEF_ENUM
+#define DEF_ENUM
+
+#undef DEF_ALG
+#define DEF_ALG(alg, name) alg, 
+
+#include "stringop.def"
+last_alg
+
+#undef DEF_ENUM
+#undef DEF_ALG
+};
+
+#define MAX_STRINGOP_ALGS 4
+
+/* Specify what algorithm to use for stringops on known size.
+   When size is unknown, the UNKNOWN_SIZE alg is used.  When size is
+   known at compile time or estimated via feedback, the SIZE array
+   is walked in order until MAX is greater then the estimate (or -1
+   means infinity).  Corresponding ALG is used then.
+   When NOALIGN is true the code guaranting the alignment of the memory
+   block is skipped.
+
+   For example initializer:
+    {{256, loop}, {-1, rep_prefix_4_byte}}
+   will use loop for blocks smaller or equal to 256 bytes, rep prefix will
+   be used otherwise.  */
+typedef struct stringop_algs
+{
+  const enum stringop_alg unknown_size;
+  const struct stringop_strategy {
+    const int max;
+    const enum stringop_alg alg;
+    int noalign;
+  } size [MAX_STRINGOP_ALGS];
+} stringop_algs;
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+  const int add;		/* cost of an add instruction */
+  const int lea;		/* cost of a lea instruction */
+  const int shift_var;		/* variable shift costs */
+  const int shift_const;	/* constant shift costs */
+  const int mult_init[5];	/* cost of starting a multiply
+				   in QImode, HImode, SImode, DImode, TImode*/
+  const int mult_bit;		/* cost of multiply per each bit set */
+  const int divide[5];		/* cost of a divide/mod
+				   in QImode, HImode, SImode, DImode, TImode*/
+  int movsx;			/* The cost of movsx operation.  */
+  int movzx;			/* The cost of movzx operation.  */
+  const int large_insn;		/* insns larger than this cost more */
+  const int move_ratio;		/* The threshold of number of scalar
+				   memory-to-memory move insns.  */
+  const int movzbl_load;	/* cost of loading using movzbl */
+  const int int_load[3];	/* cost of loading integer registers
+				   in QImode, HImode and SImode relative
+				   to reg-reg move (2).  */
+  const int int_store[3];	/* cost of storing integer register
+				   in QImode, HImode and SImode */
+  const int fp_move;		/* cost of reg,reg fld/fst */
+  const int fp_load[3];		/* cost of loading FP register
+				   in SFmode, DFmode and XFmode */
+  const int fp_store[3];	/* cost of storing FP register
+				   in SFmode, DFmode and XFmode */
+  const int mmx_move;		/* cost of moving MMX register.  */
+  const int mmx_load[2];	/* cost of loading MMX register
+				   in SImode and DImode */
+  const int mmx_store[2];	/* cost of storing MMX register
+				   in SImode and DImode */
+  const int sse_move;		/* cost of moving SSE register.  */
+  const int sse_load[3];	/* cost of loading SSE register
+				   in SImode, DImode and TImode*/
+  const int sse_store[3];	/* cost of storing SSE register
+				   in SImode, DImode and TImode*/
+  const int mmxsse_to_integer;	/* cost of moving mmxsse register to
+				   integer and vice versa.  */
+  const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
+  const int prefetch_block;	/* bytes moved to cache for prefetch.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+				   operations.  */
+  const int branch_cost;	/* Default value for BRANCH_COST.  */
+  const int fadd;		/* cost of FADD and FSUB instructions.  */
+  const int fmul;		/* cost of FMUL instruction.  */
+  const int fdiv;		/* cost of FDIV instruction.  */
+  const int fabs;		/* cost of FABS instruction.  */
+  const int fchs;		/* cost of FCHS instruction.  */
+  const int fsqrt;		/* cost of FSQRT instruction.  */
+				/* Specify what algorithm
+				   to use for stringops on unknown size.  */
+  struct stringop_algs *memcpy, *memset;
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+				   load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+					  cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+					  vectorizer cost model.  */
+};
diff --git a/gcc/config/i386/i386-tune.c b/gcc/config/i386/i386-tune.c
new file mode 100644
index 0000000..9cf702c
--- /dev/null
+++ b/gcc/config/i386/i386-tune.c
@@ -0,0 +1,148 @@
+#include <dlfcn.h>
+
+#define COST_MODEL	"ix86_cost_model"
+#define COST_MODEL_SIZE	COST_MODEL "_size"
+
+static const struct processor_costs *
+load_cost_model (const char *model)
+{
+  const struct processor_costs *cost;
+  int *cost_size;
+  void *handle;
+  char *dso = (char *) alloca (strlen (model) + sizeof ".so");
+
+  strcpy (stpcpy (dso, model), ".so");
+
+  handle = dlopen (dso, RTLD_LAZY);
+  if (handle == NULL)
+    {
+      fprintf(stderr, "load_cost_model: dlopen: %s\n", dlerror ());
+      return NULL;
+    }
+
+  cost = (const struct processor_costs *) dlsym (handle, COST_MODEL);
+  if (cost == NULL)
+    {
+      fprintf(stderr, "load_cost_model: dlsym: %s\n", dlerror ());
+      goto error_return;
+    }
+
+  cost_size = (int *) dlsym (handle, COST_MODEL_SIZE);
+  if (cost == NULL)
+    {
+      fprintf(stderr, "load_cost_model: dlsym: %s\n", dlerror ());
+      goto error_return;
+    }
+
+  if (*cost_size != sizeof (*cost))
+    {
+      fprintf(stderr, "load_cost_model: cost model size %d != %d\n",
+	      *cost_size, (int) sizeof (*cost));
+      cost = NULL;
+    }
+
+error_return:
+  if (cost == NULL)
+    dlclose (handle);
+
+  return cost;
+}
+
+void
+x86_tune_options (void)
+{
+  if (x86_cost_string)
+    {
+      if (strcmp (x86_cost_string, "size") == 0)
+	ix86_cost = &ix86_size_cost;
+      else if (strcmp (x86_cost_string, "i386") == 0)
+	ix86_cost = &i386_cost;
+      else if (strcmp (x86_cost_string, "i486") == 0)
+	ix86_cost = &i486_cost;
+      else if (strcmp (x86_cost_string, "pentium") == 0)
+	ix86_cost = &pentium_cost;
+      else if (strcmp (x86_cost_string, "pentiumpro") == 0)
+	ix86_cost = &pentiumpro_cost;
+      else if (strcmp (x86_cost_string, "geode") == 0)
+	ix86_cost = &geode_cost;
+      else if (strcmp (x86_cost_string, "k6") == 0)
+	ix86_cost = &k6_cost;
+      else if (strcmp (x86_cost_string, "athlon") == 0)
+	ix86_cost = &athlon_cost;
+      else if (strcmp (x86_cost_string, "k8") == 0)
+	ix86_cost = &k8_cost;
+      else if (strcmp (x86_cost_string, "amdfam10") == 0)
+	ix86_cost = &amdfam10_cost;
+      else if (strcmp (x86_cost_string, "bdver1") == 0)
+	ix86_cost = &bdver1_cost;
+      else if (strcmp (x86_cost_string, "bdver2") == 0)
+	ix86_cost = &bdver2_cost;
+      else if (strcmp (x86_cost_string, "bdver3") == 0)
+	ix86_cost = &bdver3_cost;
+      else if (strcmp (x86_cost_string, "bdver4") == 0)
+	ix86_cost = &bdver4_cost;
+      else if (strcmp (x86_cost_string, "btver1") == 0)
+	ix86_cost = &btver1_cost;
+      else if (strcmp (x86_cost_string, "btver2") == 0)
+	ix86_cost = &btver2_cost;
+      else if (strcmp (x86_cost_string, "pentium4") == 0)
+	ix86_cost = &pentium4_cost;
+      else if (strcmp (x86_cost_string, "nocona") == 0)
+	ix86_cost = &nocona_cost;
+      else if (strcmp (x86_cost_string, "atom") == 0)
+	ix86_cost = &atom_cost;
+      else if (strcmp (x86_cost_string, "slm") == 0)
+	ix86_cost = &slm_cost;
+      else if (strcmp (x86_cost_string, "generic") == 0)
+	ix86_cost = &generic_cost;
+      else if (strcmp (x86_cost_string, "core") == 0)
+	ix86_cost = &core_cost;
+      else
+	{
+	  ix86_cost = load_cost_model (x86_cost_string);
+	  if (ix86_cost == NULL)
+	    abort ();
+	}
+    }
+  if (x86_schedule_string)
+    {
+      if (strcmp (x86_schedule_string, "none") == 0)
+	ix86_schedule = CPU_NONE;
+      else if (strcmp (x86_schedule_string, "pentium") == 0)
+	ix86_schedule = CPU_PENTIUM;
+      else if (strcmp (x86_schedule_string, "pentiumpro") == 0)
+	ix86_schedule = CPU_PENTIUMPRO;
+      else if (strcmp (x86_schedule_string, "geode") == 0)
+	ix86_schedule = CPU_GEODE;
+      else if (strcmp (x86_schedule_string, "k6") == 0)
+	ix86_schedule = CPU_K6;
+      else if (strcmp (x86_schedule_string, "athlon") == 0)
+	ix86_schedule = CPU_ATHLON;
+      else if (strcmp (x86_schedule_string, "k8") == 0)
+	ix86_schedule = CPU_K8;
+      else if (strcmp (x86_schedule_string, "core2") == 0)
+	ix86_schedule = CPU_CORE2;
+      else if (strcmp (x86_schedule_string, "corei7") == 0)
+	ix86_schedule = CPU_NEHALEM;
+      else if (strcmp (x86_schedule_string, "atom") == 0)
+	ix86_schedule = CPU_ATOM;
+      else if (strcmp (x86_schedule_string, "slm") == 0)
+	ix86_schedule = CPU_SLM;
+      else if (strcmp (x86_schedule_string, "generic") == 0)
+	ix86_schedule = CPU_GENERIC;
+      else if (strcmp (x86_schedule_string, "amdfam10") == 0)
+	ix86_schedule = CPU_AMDFAM10;
+      else if (strcmp (x86_schedule_string, "bdver1") == 0)
+	ix86_schedule = CPU_BDVER1;
+      else if (strcmp (x86_schedule_string, "bdver2") == 0)
+	ix86_schedule = CPU_BDVER2;
+      else if (strcmp (x86_schedule_string, "bdver3") == 0)
+	ix86_schedule = CPU_BDVER3;
+      else if (strcmp (x86_schedule_string, "bdver4") == 0)
+	ix86_schedule = CPU_BDVER4;
+      else if (strcmp (x86_schedule_string, "btver2") == 0)
+	ix86_schedule = CPU_BTVER2;
+      else
+	abort ();
+    }
+}
diff --git a/gcc/config/i386/i386-tune.h b/gcc/config/i386/i386-tune.h
new file mode 100644
index 0000000..50ab949
--- /dev/null
+++ b/gcc/config/i386/i386-tune.h
@@ -0,0 +1,19 @@
+/* Tuning options:
+
+   -mcost=[size|i386|i486|pentium|pentiumpro|geode|k6|athlon|k8|amdfam10|bdver1|bdver2|bdver3|bdver4|btver1|btver2|pentium4|nocona|atom|slm|generic|core]
+
+   -missue-rate=NUMBER
+
+   -madjust-cost=NUMBER
+
+   -mmultipass-dfa-lookahead=NUMBER
+
+ */  
+
+extern void x86_tune_options (void);
+
+extern int ix86_issue_rate_value;
+
+extern int ix86_adjust_cost_value;
+
+extern int ia32_multipass_dfa_lookahead_value;
diff --git a/gcc/config/i386/i386-tune.opt b/gcc/config/i386/i386-tune.opt
new file mode 100644
index 0000000..d1bcc88
--- /dev/null
+++ b/gcc/config/i386/i386-tune.opt
@@ -0,0 +1,7 @@
+; Tuning options for i386.
+
+mcost=
+Target RejectNegative Joined Report Var(x86_cost_string) Undocumented
+
+mschedule=
+Target RejectNegative Joined Report Var(x86_schedule_string) Undocumented
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 54ee6f3..3eae830 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3917,7 +3917,7 @@ ix86_option_override_internal (bool main_args_p,
    ix86_tune = processor_alias_table[i].processor;
    if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
      {
-	    if (!(processor_alias_table[i].flags & PTA_64BIT))
+	    if (0 && !(processor_alias_table[i].flags & PTA_64BIT))
          {
    	if (ix86_tune_defaulted)
    	  {
@@ -4003,6 +4003,8 @@ ix86_option_override_internal (bool main_args_p,
   else
     ix86_cost = ix86_tune_cost;
+  x86_tune_options ();
+
   /* Arrange to set up i386_stack_locals for all functions.  */
   init_machine_status = ix86_init_machine_status;
@@ -52679,6 +52681,8 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
+#include "config/i386/i386-tune.c"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 74334ff..8fc5606 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2577,3 +2577,5 @@ Local variables:
 version-control: t
 End:
 */
+
+#include "config/i386/i386-tune.h"
-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.