I've just submitted a merge request for the vldN and vstN intrinsic improvements. There are five related patches, so I thought it might be easier to review the merge if I posted the individual changes here.
See:
http://www.mail-archive.com/linaro-toolchain@lists.linaro.org/msg00969.html
for an example of how this helps.
Richard
This first patch optimises the output for vld3q and vld4q functions. These functions expand into two individual vld3 and vld4 instructions, with each instruction setting one (interleaved) half of the output register. The problem was that both instructions treated the output register as an input, whereas only the second one needs to. We therefore treated the output register as being live before the vldNq and generated unnecessary spill code.
E.g.:
#include <arm_neon.h>
void foo (uint32_t *a, uint32_t *b, uint32_t *c) { uint32x4x3_t x, y;
x = vld3q_u32 (a); y = vld3q_u32 (b); x.val[0] = vaddq_u32 (x.val[0], y.val[0]); x.val[1] = vaddq_u32 (x.val[1], y.val[1]); x.val[2] = vaddq_u32 (x.val[2], y.val[2]); vst3q_u32 (a, x); }
gave:
stmfd sp!, {r3, fp} ldr r2, .L2 add fp, sp, #4 vldmia r2, {d16-d21} sub sp, sp, #112 vmov q11, q8 @ ti vmov q12, q9 @ ti vmov q13, q10 @ ti ...
where the vldmia is loading the x and y "inputs" to the two vld3q_u32s from the corresponding stack slots.
The patch is a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01634.html
which has been applied to 4.7. No changes were needed for 4.5.
Richard
gcc/ Backport from mainline:
2011-03-30 Richard Sandiford richard.sandiford@linaro.org Ramana Radhakrishnan ramana.radhakrishnan@linaro.org
PR target/43590 * config/arm/neon.md (neon_vld3qa<mode>, neon_vld4qa<mode>): Remove operand 1 and reshuffle the operands to match. (neon_vld3<mode>, neon_vld4<mode>): Update accordingly.
Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md 2011-04-19 13:55:04.000000000 +0000 +++ gcc/config/arm/neon.md 2011-04-19 13:55:04.000000000 +0000 @@ -4925,8 +4925,7 @@ (define_expand "neon_vld3<mode>" (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[0], - operands[1], operands[1])); + emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[1], operands[1])); emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0], operands[1], operands[1])); DONE; @@ -4934,12 +4933,11 @@ (define_expand "neon_vld3<mode>"
(define_insn "neon_vld3qa<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:CI 1 "s_register_operand" "0") + (unspec:CI [(mem:CI (match_operand:SI 2 "s_register_operand" "1")) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3A)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) + (set (match_operand:SI 1 "s_register_operand" "=r") + (plus:SI (match_dup 2) (const_int 24)))] "TARGET_NEON" { @@ -4948,7 +4946,7 @@ (define_insn "neon_vld3qa<mode>" ops[0] = gen_rtx_REG (DImode, regno); ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = gen_rtx_REG (DImode, regno + 8); - ops[3] = operands[2]; + ops[3] = operands[1]; output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); return ""; } @@ -5217,8 +5215,7 @@ (define_expand "neon_vld4<mode>" (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0], - operands[1], operands[1])); + emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[1], operands[1])); emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0], operands[1], operands[1])); DONE; @@ -5226,12 +5223,11 @@ (define_expand "neon_vld4<mode>"
(define_insn "neon_vld4qa<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:XI 1 "s_register_operand" "0") + (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1")) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4A)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) + (set (match_operand:SI 1 "s_register_operand" "=r") + (plus:SI (match_dup 2) (const_int 32)))] "TARGET_NEON" { @@ -5241,7 +5237,7 @@ (define_insn "neon_vld4qa<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = gen_rtx_REG (DImode, regno + 12); - ops[4] = operands[2]; + ops[4] = operands[1]; output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); return ""; }
The patterns for the Neon vld and vst intrinsics used the following sort of construct to refer to memory:
(mem:FOO (match_operand:SI X "register_operand" "r"))
This patch changes them to use:
(match_operand:FOO' X "neon_struct_operand" "(=)Um")
instead. This allows the loads to use post-increment addresses as well as bare registers, and also matches the form that the vec_load_lanes and vec_store_lanes optabs need. (Those optabs will be in a later autovectorisation merge.)
The patch is a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01996.html
which has been applied to 4.7. There are three differences in the 4.5 version:
* Our 4.5 code prints alignments as "[rN, :ALIGN]" rather than "[rN:ALIGN]". I've fixed that here. The initial commit to FSF trunk used the correct form, so there isn't a separate fix that could be backported.
* 4.5 doesn't have MEM_REF, so neon_dereference_pointer uses an INDIRECT_REF instead.
* 4.5 defines the mode attributes in neon.md rather than in a separate iterators.md.
Richard
gcc/ Backport from mainline:
2011-04-12 Richard Sandiford richard.sandiford@linaro.org
* config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the size of a '%A' memory reference. (T_DREG, T_QREG): New neon_builtin_type_bits. (arm_init_neon_builtins): Assert that the load and store operands are neon_struct_operands. (locate_neon_builtin_icode): Provide the neon_builtin_type_bits. (NEON_ARG_MEMORY): New builtin_arg. (neon_dereference_pointer): New function. (arm_expand_neon_args): Add a neon_builtin_type_bits argument. Handle NEON_ARG_MEMORY. (arm_expand_neon_builtin): Update after above interface changes. Use NEON_ARG_MEMORY for loads and stores. * config/arm/predicates.md (neon_struct_operand): New predicate. * config/arm/neon.md (V_two_elem): Tweak formatting. (V_three_elem): Use BLKmode for accesses that have no associated mode. (neon_vld1<mode>, neon_vld1_dup<mode>) (neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>) (neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>) (neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>) (neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>) (neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>) (neon_vst4<mode>): Replace pointer operand with a memory operand. Use %A in the output template. (neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>) (neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>) (neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve the width of the memory access. Remove post-increment. * config/arm/neon-testgen.ml: Allow addresses to have an alignment.
gcc/testsuite/ Backport from mainline:
2011-04-12 Richard Sandiford richard.sandiford@linaro.org
* gcc.target/arm/neon-vld3-1.c: New test. * gcc.target/arm/neon-vst3-1.c: New test. * gcc.target/arm/neon/v*.c: Regenerate.
Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/arm.c 2011-04-20 09:32:44.000000000 +0000 @@ -16847,7 +16847,7 @@ arm_print_operand (FILE *stream, rtx x, { rtx addr; bool postinc = FALSE; - unsigned align, modesize, align_bits; + unsigned align, memsize, align_bits;
gcc_assert (GET_CODE (x) == MEM); addr = XEXP (x, 0); @@ -16862,12 +16862,12 @@ arm_print_operand (FILE *stream, rtx x, instruction (for some alignments) as an aid to the memory subsystem of the target. */ align = MEM_ALIGN (x) >> 3; - modesize = GET_MODE_SIZE (GET_MODE (x)); + memsize = INTVAL (MEM_SIZE (x)); /* Only certain alignment specifiers are supported by the hardware. */ - if (modesize == 16 && (align % 32) == 0) + if (memsize == 16 && (align % 32) == 0) align_bits = 256; - else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) + else if ((memsize == 8 || memsize == 16) && (align % 16) == 0) align_bits = 128; else if ((align % 8) == 0) align_bits = 64; @@ -16875,7 +16875,7 @@ arm_print_operand (FILE *stream, rtx x, align_bits = 0; if (align_bits != 0) - asm_fprintf (stream, ", :%d", align_bits); + asm_fprintf (stream, ":%d", align_bits);
asm_fprintf (stream, "]");
@@ -18398,12 +18398,14 @@ enum neon_builtin_type_bits { T_V2SI = 0x0004, T_V2SF = 0x0008, T_DI = 0x0010, + T_DREG = 0x001F, T_V16QI = 0x0020, T_V8HI = 0x0040, T_V4SI = 0x0080, T_V4SF = 0x0100, T_V2DI = 0x0200, T_TI = 0x0400, + T_QREG = 0x07E0, T_EI = 0x0800, T_OI = 0x1000 }; @@ -19049,10 +19051,9 @@ arm_init_neon_builtins (void) if (is_load && k == 1) { /* Neon load patterns always have the memory operand - (a SImode pointer) in the operand 1 position. We - want a const pointer to the element type in that - position. */ - gcc_assert (insn_data[icode].operand[k].mode == SImode); + in the operand 1 position. */ + gcc_assert (insn_data[icode].operand[k].predicate + == neon_struct_operand);
switch (1 << j) { @@ -19087,10 +19088,9 @@ arm_init_neon_builtins (void) else if (is_store && k == 0) { /* Similarly, Neon store patterns use operand 0 as - the memory location to store to (a SImode pointer). - Use a pointer to the element type of the store in - that position. */ - gcc_assert (insn_data[icode].operand[k].mode == SImode); + the memory location to store to. */ + gcc_assert (insn_data[icode].operand[k].predicate + == neon_struct_operand);
switch (1 << j) { @@ -19410,10 +19410,11 @@ neon_builtin_compare (const void *a, con }
static enum insn_code -locate_neon_builtin_icode (int fcode, neon_itype *itype) +locate_neon_builtin_icode (int fcode, neon_itype *itype, + enum neon_builtin_type_bits *type_bit) { neon_builtin_datum key, *found; - int idx; + int idx, type, ntypes;
key.base_fcode = fcode; found = (neon_builtin_datum *) @@ -19426,20 +19427,83 @@ locate_neon_builtin_icode (int fcode, ne if (itype) *itype = found->itype;
+ if (type_bit) + { + ntypes = 0; + for (type = 0; type < T_MAX; type++) + if (found->bits & (1 << type)) + { + if (ntypes == idx) + break; + ntypes++; + } + gcc_assert (type < T_MAX); + *type_bit = (enum neon_builtin_type_bits) (1 << type); + } return found->codes[idx]; }
typedef enum { NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_MEMORY, NEON_ARG_STOP } builtin_arg;
#define NEON_MAX_BUILTIN_ARGS 5
+/* EXP is a pointer argument to a Neon load or store intrinsic. Derive + and return an expression for the accessed memory. + + The intrinsic function operates on a block of registers that has + mode REG_MODE. This block contains vectors of type TYPE_BIT. + The function references the memory at EXP in mode MEM_MODE; + this mode may be BLKmode if no more suitable mode is available. */ + +static tree +neon_dereference_pointer (tree exp, enum machine_mode mem_mode, + enum machine_mode reg_mode, + enum neon_builtin_type_bits type_bit) +{ + HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; + tree elem_type, upper_bound, array_type; + + /* Work out the size of the register block in bytes. */ + reg_size = GET_MODE_SIZE (reg_mode); + + /* Work out the size of each vector in bytes. */ + gcc_assert (type_bit & (T_DREG | T_QREG)); + vector_size = (type_bit & T_QREG ? 16 : 8); + + /* Work out how many vectors there are. */ + gcc_assert (reg_size % vector_size == 0); + nvectors = reg_size / vector_size; + + /* Work out how many elements are being loaded or stored. + MEM_MODE == REG_MODE implies a one-to-one mapping between register + and memory elements; anything else implies a lane load or store. */ + if (mem_mode == reg_mode) + nelems = vector_size * nvectors; + else + nelems = nvectors; + + /* Work out the type of each element. */ + gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp))); + elem_type = TREE_TYPE (TREE_TYPE (exp)); + + /* Create a type that describes the full access. */ + upper_bound = build_int_cst (size_type_node, nelems - 1); + array_type = build_array_type (elem_type, build_index_type (upper_bound)); + + /* Dereference EXP using that type. */ + exp = convert (build_pointer_type (array_type), exp); + return fold_build1 (INDIRECT_REF, array_type, exp); +} + /* Expand a Neon builtin. */ static rtx arm_expand_neon_args (rtx target, int icode, int have_retval, + enum neon_builtin_type_bits type_bit, tree exp, ...) { va_list ap; @@ -19448,7 +19512,9 @@ arm_expand_neon_args (rtx target, int ic rtx op[NEON_MAX_BUILTIN_ARGS]; enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; + enum machine_mode other_mode; int argc = 0; + int opno;
if (have_retval && (!target @@ -19466,26 +19532,46 @@ arm_expand_neon_args (rtx target, int ic break; else { + opno = argc + have_retval; + mode[argc] = insn_data[icode].operand[opno].mode; arg[argc] = CALL_EXPR_ARG (exp, argc); + if (thisarg == NEON_ARG_MEMORY) + { + other_mode = insn_data[icode].operand[1 - opno].mode; + arg[argc] = neon_dereference_pointer (arg[argc], mode[argc], + other_mode, type_bit); + } op[argc] = expand_normal (arg[argc]); - mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
switch (thisarg) { case NEON_ARG_COPY_TO_REG: /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ - if (!(*insn_data[icode].operand[argc + have_retval].predicate) + if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) op[argc] = copy_to_mode_reg (mode[argc], op[argc]); break;
case NEON_ARG_CONSTANT: /* FIXME: This error message is somewhat unhelpful. */ - if (!(*insn_data[icode].operand[argc + have_retval].predicate) + if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) error ("argument must be a constant"); break;
+ case NEON_ARG_MEMORY: + gcc_assert (MEM_P (op[argc])); + PUT_MODE (op[argc], mode[argc]); + /* ??? arm_neon.h uses the same built-in functions for signed + and unsigned accesses, casting where necessary. This isn't + alias safe. */ + set_mem_alias_set (op[argc], 0); + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + op[argc] = (replace_equiv_address + (op[argc], force_reg (Pmode, XEXP (op[argc], 0)))); + break; + case NEON_ARG_STOP: gcc_unreachable (); } @@ -19564,14 +19650,15 @@ arm_expand_neon_args (rtx target, int ic arm_expand_neon_builtin (int fcode, tree exp, rtx target) { neon_itype itype; - enum insn_code icode = locate_neon_builtin_icode (fcode, &itype); + enum neon_builtin_type_bits type_bit; + enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit);
switch (itype) { case NEON_UNOP: case NEON_CONVERT: case NEON_DUPLANE: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_BINOP: @@ -19581,90 +19668,90 @@ arm_expand_neon_builtin (int fcode, tree case NEON_SCALARMULH: case NEON_SHIFTINSERT: case NEON_LOGICBINOP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_TERNOP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_GETLANE: case NEON_FIXCONV: case NEON_SHIFTIMM: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_CREATE: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_DUP: case NEON_SPLIT: case NEON_REINTERP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_COMBINE: case NEON_VTBL: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_RESULTPAIR: - return arm_expand_neon_args (target, icode, 0, exp, + return arm_expand_neon_args (target, icode, 0, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_LANEMUL: case NEON_LANEMULL: case NEON_LANEMULH: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_LANEMAC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SHIFTACC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SCALARMAC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SELECT: case NEON_VTBX: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_LOAD1: case NEON_LOADSTRUCT: - return arm_expand_neon_args (target, icode, 1, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_STOP);
case NEON_LOAD1LANE: case NEON_LOADSTRUCTLANE: - return arm_expand_neon_args (target, icode, 1, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_STORE1: case NEON_STORESTRUCT: - return arm_expand_neon_args (target, icode, 0, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + return arm_expand_neon_args (target, icode, 0, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_STORE1LANE: case NEON_STORESTRUCTLANE: - return arm_expand_neon_args (target, icode, 0, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + return arm_expand_neon_args (target, icode, 0, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); }
Index: gcc/config/arm/predicates.md =================================================================== --- gcc/config/arm/predicates.md 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/predicates.md 2011-04-20 08:29:52.000000000 +0000 @@ -681,3 +681,7 @@ (define_special_predicate "vect_par_cons } return true; }) + +(define_special_predicate "neon_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/neon.md 2011-04-20 08:29:52.000000000 +0000 @@ -259,20 +259,18 @@ (define_mode_attr V_ext [(V8QI "SI") (V1
;; Mode of pair of elements for each vector mode, to define transfer ;; size for structure lane/dup loads and stores. -(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") - (V4HI "SI") (V8HI "SI") +(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") (V2SI "V2SI") (V4SI "V2SI") (V2SF "V2SF") (V4SF "V2SF") (DI "V2DI") (V2DI "V2DI")])
;; Similar, for three elements. -;; ??? Should we define extra modes so that sizes of all three-element -;; accesses can be accurately represented? -(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI") - (V4HI "V4HI") (V8HI "V4HI") - (V2SI "V4SI") (V4SI "V4SI") - (V2SF "V4SF") (V4SF "V4SF") - (DI "EI") (V2DI "EI")]) +(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") + (V4HI "BLK") (V8HI "BLK") + (V2SI "BLK") (V4SI "BLK") + (V2SF "BLK") (V4SF "BLK") + (DI "EI") (V2DI "EI")])
;; Similar, for four elements. (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") @@ -4567,16 +4565,16 @@ (define_expand "neon_vreinterpretv2di<mo
(define_insn "neon_vld1<mode>" [(set (match_operand:VDQX 0 "s_register_operand" "=w") - (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] UNSPEC_VLD1))] "TARGET_NEON" - "vld1.<V_sz_elem>\t%h0, [%1]" + "vld1.<V_sz_elem>\t%h0, %A1" [(set_attr "neon_type" "neon_vld1_1_2_regs")] )
(define_insn "neon_vld1_lane<mode>" [(set (match_operand:VDX 0 "s_register_operand" "=w") - (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") (match_operand:VDX 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VLD1_LANE))] @@ -4587,9 +4585,9 @@ (define_insn "neon_vld1_lane<mode>" if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) - return "vld1.<V_sz_elem>\t%P0, [%1]"; + return "vld1.<V_sz_elem>\t%P0, %A1"; else - return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) @@ -4599,7 +4597,7 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_lane<mode>" [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") (match_operand:VQX 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VLD1_LANE))] @@ -4618,9 +4616,9 @@ (define_insn "neon_vld1_lane<mode>" } operands[0] = gen_rtx_REG (<V_HALF>mode, regno); if (max == 2) - return "vld1.<V_sz_elem>\t%P0, [%1]"; + return "vld1.<V_sz_elem>\t%P0, %A1"; else - return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) @@ -4630,14 +4628,14 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_dup<mode>" [(set (match_operand:VDX 0 "s_register_operand" "=w") - (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] UNSPEC_VLD1_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 1) - return "vld1.<V_sz_elem>\t{%P0[]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4647,14 +4645,14 @@ (define_insn "neon_vld1_dup<mode>"
(define_insn "neon_vld1_dup<mode>" [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] UNSPEC_VLD1_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 2) - return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; + return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4663,15 +4661,15 @@ (define_insn "neon_vld1_dup<mode>" )
(define_insn "neon_vst1<mode>" - [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] UNSPEC_VST1))] "TARGET_NEON" - "vst1.<V_sz_elem>\t%h1, [%0]" + "vst1.<V_sz_elem>\t%h1, %A0" [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
(define_insn "neon_vst1_lane<mode>" - [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") (vec_select:<V_elem> (match_operand:VDX 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] @@ -4682,9 +4680,9 @@ (define_insn "neon_vst1_lane<mode>" if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) - return "vst1.<V_sz_elem>\t{%P1}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1}, %A0"; else - return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1)) @@ -4692,7 +4690,7 @@ (define_insn "neon_vst1_lane<mode>" (const_string "neon_vst1_vst2_lane")))])
(define_insn "neon_vst1_lane<mode>" - [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") (vec_select:<V_elem> (match_operand:VQX 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] @@ -4711,24 +4709,24 @@ (define_insn "neon_vst1_lane<mode>" } operands[1] = gen_rtx_REG (<V_HALF>mode, regno); if (max == 2) - return "vst1.<V_sz_elem>\t{%P1}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1}, %A0"; else - return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] )
(define_insn "neon_vld2<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld2.<V_sz_elem>\t%h0, [%1]"; + return "vld2.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4738,16 +4736,16 @@ (define_insn "neon_vld2<mode>"
(define_insn "neon_vld2<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" - "vld2.<V_sz_elem>\t%h0, [%1]" + "vld2.<V_sz_elem>\t%h0, %A1" [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
(define_insn "neon_vld2_lane<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (match_operand:TI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4764,7 +4762,7 @@ (define_insn "neon_vld2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = operands[1]; ops[3] = operands[3]; - output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } [(set_attr "neon_type" "neon_vld1_vld2_lane")] @@ -4772,7 +4770,7 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_lane<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4794,7 +4792,7 @@ (define_insn "neon_vld2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = operands[1]; ops[3] = GEN_INT (lane); - output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } [(set_attr "neon_type" "neon_vld1_vld2_lane")] @@ -4802,15 +4800,15 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_dup<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 1) - return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; + return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4819,16 +4817,16 @@ (define_insn "neon_vld2_dup<mode>" )
(define_insn "neon_vst2<mode>" - [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst2.<V_sz_elem>\t%h1, [%0]"; + return "vst2.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4837,17 +4835,17 @@ (define_insn "neon_vst2<mode>" )
(define_insn "neon_vst2<mode>" - [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" - "vst2.<V_sz_elem>\t%h1, [%0]" + "vst2.<V_sz_elem>\t%h1, %A0" [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] )
(define_insn "neon_vst2_lane<mode>" - [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_two_elem> [(match_operand:TI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -4865,14 +4863,14 @@ (define_insn "neon_vst2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = operands[2]; - output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] )
(define_insn "neon_vst2_lane<mode>" - [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_two_elem> [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -4895,7 +4893,7 @@ (define_insn "neon_vst2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = GEN_INT (lane); - output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] @@ -4903,15 +4901,15 @@ (define_insn "neon_vst2_lane<mode>"
(define_insn "neon_vld3<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld3.<V_sz_elem>\t%h0, [%1]"; + return "vld3.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4920,25 +4918,25 @@ (define_insn "neon_vld3<mode>" )
(define_expand "neon_vld3<mode>" - [(match_operand:CI 0 "s_register_operand" "=w") - (match_operand:SI 1 "s_register_operand" "+r") + [(match_operand:CI 0 "s_register_operand") + (match_operand:CI 1 "neon_struct_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[1], operands[1])); - emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0], - operands[1], operands[1])); + rtx mem; + + mem = adjust_address (operands[1], EImode, 0); + emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); DONE; })
(define_insn "neon_vld3qa<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:CI (match_operand:SI 2 "s_register_operand" "1")) + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD3A)) - (set (match_operand:SI 1 "s_register_operand" "=r") - (plus:SI (match_dup 2) - (const_int 24)))] + UNSPEC_VLD3A))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -4947,7 +4945,7 @@ (define_insn "neon_vld3qa<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = operands[1]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); + output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -4955,13 +4953,10 @@ (define_insn "neon_vld3qa<mode>"
(define_insn "neon_vld3qb<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:CI 1 "s_register_operand" "0") + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (match_operand:CI 2 "s_register_operand" "0") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD3B)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) - (const_int 24)))] + UNSPEC_VLD3B))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -4969,8 +4964,8 @@ (define_insn "neon_vld3qb<mode>" ops[0] = gen_rtx_REG (DImode, regno + 2); ops[1] = gen_rtx_REG (DImode, regno + 6); ops[2] = gen_rtx_REG (DImode, regno + 10); - ops[3] = operands[2]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); + ops[3] = operands[1]; + output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -4978,7 +4973,7 @@ (define_insn "neon_vld3qb<mode>"
(define_insn "neon_vld3_lane<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (match_operand:EI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4996,7 +4991,7 @@ (define_insn "neon_vld3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; ops[4] = operands[3]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", ops); return ""; } @@ -5005,7 +5000,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_lane<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (match_operand:CI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5028,7 +5023,7 @@ (define_insn "neon_vld3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = operands[1]; ops[4] = GEN_INT (lane); - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", ops); return ""; } @@ -5037,7 +5032,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_dup<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_DUP))] "TARGET_NEON" @@ -5050,11 +5045,11 @@ (define_insn "neon_vld3_dup<mode>" ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops); + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops); return ""; } else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -5062,16 +5057,16 @@ (define_insn "neon_vld3_dup<mode>" (const_string "neon_vld1_1_2_regs")))])
(define_insn "neon_vst3<mode>" - [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst3.<V_sz_elem>\t%h1, [%0]"; + return "vst3.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5079,62 +5074,60 @@ (define_insn "neon_vst3<mode>" (const_string "neon_vst2_4_regs_vst3_vst4")))])
(define_expand "neon_vst3<mode>" - [(match_operand:SI 0 "s_register_operand" "+r") - (match_operand:CI 1 "s_register_operand" "w") + [(match_operand:CI 0 "neon_struct_operand") + (match_operand:CI 1 "s_register_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1])); - emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1])); + rtx mem; + + mem = adjust_address (operands[0], EImode, 0); + emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); DONE; })
(define_insn "neon_vst3qa<mode>" - [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST3A)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 24)))] + UNSPEC_VST3A))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[4]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); + output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst3qb<mode>" - [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST3B)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 24)))] + UNSPEC_VST3B))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[4]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 6); ops[3] = gen_rtx_REG (DImode, regno + 10); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); + output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst3_lane<mode>" - [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_three_elem> [(match_operand:EI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5153,7 +5146,7 @@ (define_insn "neon_vst3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = operands[2]; - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", ops); return ""; } @@ -5161,7 +5154,7 @@ (define_insn "neon_vst3_lane<mode>" )
(define_insn "neon_vst3_lane<mode>" - [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_three_elem> [(match_operand:CI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5185,7 +5178,7 @@ (define_insn "neon_vst3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = GEN_INT (lane); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", ops); return ""; } @@ -5193,15 +5186,15 @@ (define_insn "neon_vst3_lane<mode>"
(define_insn "neon_vld4<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld4.<V_sz_elem>\t%h0, [%1]"; + return "vld4.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5210,25 +5203,25 @@ (define_insn "neon_vld4<mode>" )
(define_expand "neon_vld4<mode>" - [(match_operand:XI 0 "s_register_operand" "=w") - (match_operand:SI 1 "s_register_operand" "+r") + [(match_operand:XI 0 "s_register_operand") + (match_operand:XI 1 "neon_struct_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[1], operands[1])); - emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0], - operands[1], operands[1])); + rtx mem; + + mem = adjust_address (operands[1], OImode, 0); + emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); DONE; })
(define_insn "neon_vld4qa<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1")) + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD4A)) - (set (match_operand:SI 1 "s_register_operand" "=r") - (plus:SI (match_dup 2) - (const_int 32)))] + UNSPEC_VLD4A))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -5238,7 +5231,7 @@ (define_insn "neon_vld4qa<mode>" ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = gen_rtx_REG (DImode, regno + 12); ops[4] = operands[1]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -5246,13 +5239,10 @@ (define_insn "neon_vld4qa<mode>"
(define_insn "neon_vld4qb<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:XI 1 "s_register_operand" "0") + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (match_operand:XI 2 "s_register_operand" "0") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD4B)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) - (const_int 32)))] + UNSPEC_VLD4B))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -5261,8 +5251,8 @@ (define_insn "neon_vld4qb<mode>" ops[1] = gen_rtx_REG (DImode, regno + 6); ops[2] = gen_rtx_REG (DImode, regno + 10); ops[3] = gen_rtx_REG (DImode, regno + 14); - ops[4] = operands[2]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + ops[4] = operands[1]; + output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -5270,7 +5260,7 @@ (define_insn "neon_vld4qb<mode>"
(define_insn "neon_vld4_lane<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5289,7 +5279,7 @@ (define_insn "neon_vld4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 6); ops[4] = operands[1]; ops[5] = operands[3]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", ops); return ""; } @@ -5298,7 +5288,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_lane<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (match_operand:XI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5322,7 +5312,7 @@ (define_insn "neon_vld4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 12); ops[4] = operands[1]; ops[5] = GEN_INT (lane); - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", ops); return ""; } @@ -5331,7 +5321,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_dup<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_DUP))] "TARGET_NEON" @@ -5345,12 +5335,12 @@ (define_insn "neon_vld4_dup<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 6); ops[4] = operands[1]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); return ""; } else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -5359,16 +5349,16 @@ (define_insn "neon_vld4_dup<mode>" )
(define_insn "neon_vst4<mode>" - [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst4.<V_sz_elem>\t%h1, [%0]"; + return "vst4.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5377,64 +5367,62 @@ (define_insn "neon_vst4<mode>" )
(define_expand "neon_vst4<mode>" - [(match_operand:SI 0 "s_register_operand" "+r") - (match_operand:XI 1 "s_register_operand" "w") + [(match_operand:XI 0 "neon_struct_operand") + (match_operand:XI 1 "s_register_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1])); - emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1])); + rtx mem; + + mem = adjust_address (operands[0], OImode, 0); + emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); DONE; })
(define_insn "neon_vst4qa<mode>" - [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST4A)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 32)))] + UNSPEC_VST4A))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[5]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = gen_rtx_REG (DImode, regno + 12); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst4qb<mode>" - [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST4B)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 32)))] + UNSPEC_VST4B))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[5]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 6); ops[3] = gen_rtx_REG (DImode, regno + 10); ops[4] = gen_rtx_REG (DImode, regno + 14); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst4_lane<mode>" - [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_four_elem> [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5454,7 +5442,7 @@ (define_insn "neon_vst4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = gen_rtx_REG (DImode, regno + 6); ops[5] = operands[2]; - output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", ops); return ""; } @@ -5462,7 +5450,7 @@ (define_insn "neon_vst4_lane<mode>" )
(define_insn "neon_vst4_lane<mode>" - [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_four_elem> [(match_operand:XI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5487,7 +5475,7 @@ (define_insn "neon_vst4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = gen_rtx_REG (DImode, regno + 12); ops[5] = GEN_INT (lane); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", ops); return ""; } Index: gcc/config/arm/neon-testgen.ml =================================================================== --- gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:52.000000000 +0000 @@ -177,7 +177,7 @@ let rec analyze_shape shape = let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in "\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\}" | (PtrTo elt | CstPtrTo elt) -> - "\\\[" ^ (analyze_shape_elt elt) ^ "\\\]" + "\\\[" ^ (analyze_shape_elt elt) ^ "\(:\[0-9\]+\)?\\\]" | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\[\[0-9\]+\\\]" | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\[\[0-9\]+\\\]" | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\[\\\]" Index: gcc/testsuite/gcc.target/arm/neon-vld3-1.c =================================================================== --- /dev/null 2010-10-05 15:55:33.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon-vld3-1.c 2011-04-20 08:29:52.000000000 +0000 @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint32_t buffer[12]; + +void __attribute__((noinline)) +foo (uint32_t *a) +{ + uint32x4x3_t x; + + x = vld3q_u32 (a); + x.val[0] = vaddq_u32 (x.val[0], x.val[1]); + vst3q_u32 (a, x); +} + +int +main (void) +{ + buffer[0] = 1; + buffer[1] = 2; + foo (buffer); + return buffer[0] != 3; +} Index: gcc/testsuite/gcc.target/arm/neon-vst3-1.c =================================================================== --- /dev/null 2010-10-05 15:55:33.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon-vst3-1.c 2011-04-20 08:29:52.000000000 +0000 @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint32_t buffer[64]; + +void __attribute__((noinline)) +foo (uint32_t *a) +{ + uint32x4x3_t x; + + x = vld3q_u32 (a); + a[35] = 1; + vst3q_lane_u32 (a + 32, x, 1); +} + +int +main (void) +{ + foo (buffer); + return buffer[35] != 1; +} Index: gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c =================================================================== --- gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20 08:29:45.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20 08:29:52.000000000 +0000 @@ -15,5 +15,5 @@ void test_vld1Q_dupf32 (void) out_float32x4_t = vld1q_dup_f32 (0); }
-/* { dg-final { scan-assembler "vld1.32[ ]+\{(([dD][0-9]+\[\]-[dD][0-9]+\[\])|([dD][0-9]+\[\], [dD][0-9]+\[\]))\}, \[[rR][0-9]+\]!?([ ]+@.*)?\n" } } */ +/* { dg-final { scan-assembler "vld1.32[ ]+\{(([dD][0-9]+\[\]-[dD][0-9]+\[\])|([dD][0-9]+\[\], [dD][0-9]+\[\]))\}, \[[rR][0-9]+(:[0-9]+)?\]!?([ ]+@.*)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */
[...etc...]
This patch allows the target to override MAX_FIXED_MODE_SIZE for specific kinds of array. We can then give a non-BLK mode to things like uint32x2x4_t, which in turn allows them to be stored in registers.
The patch is a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg02192.html
which Richard Guenther approved in principle, but which can't be applied yet because of 4/5. The only difference in the 4.5 version is that 4.5 still uses the old target hook definition scheme, rather than 4.7's target.def.
Richard
gcc/ * hooks.h (hook_bool_mode_uhwi_false): Declare. * hooks.c (hook_bool_mode_uhwi_false): New function. * doc/tm.texi (TARGET_ARRAY_MODE_SUPPORTED_P): Document. * target.h (array_mode_supported_p): New hook. * target-def.h (TARGET_ARRAY_MODE_SUPPORTED_P): Define if undefined. (TARGET_INITIALIZER): Include it. * stor-layout.c (mode_for_array): New function. (layout_type): Use it. * config/arm/arm.c (arm_array_mode_supported_p): New function. (TARGET_ARRAY_MODE_SUPPORTED_P): Define.
Index: gcc/hooks.h =================================================================== --- gcc/hooks.h 2011-04-19 14:14:01.000000000 +0000 +++ gcc/hooks.h 2011-04-19 16:19:06.000000000 +0000 @@ -32,6 +32,8 @@ extern bool hook_bool_const_int_const_in extern bool hook_bool_mode_false (enum machine_mode); extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx); extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx); +extern bool hook_bool_mode_uhwi_false (enum machine_mode, + unsigned HOST_WIDE_INT); extern bool hook_bool_tree_false (tree); extern bool hook_bool_const_tree_false (const_tree); extern bool hook_bool_tree_true (tree); Index: gcc/hooks.c =================================================================== --- gcc/hooks.c 2011-04-19 14:14:01.000000000 +0000 +++ gcc/hooks.c 2011-04-19 16:19:06.000000000 +0000 @@ -86,6 +86,15 @@ hook_bool_mode_const_rtx_true (enum mach return true; }
+/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT) + and returns false. */ +bool +hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED, + unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED) +{ + return false; +} + /* Generic hook that takes (FILE *, const char *) and does nothing. */ void hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED) Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi 2011-04-19 14:14:01.000000000 +0000 +++ gcc/doc/tm.texi 2011-04-19 16:38:08.000000000 +0000 @@ -4367,6 +4367,34 @@ insns involving vector mode @var{mode}. must have move patterns for this mode. @end deftypefn
+@deftypefn {Target Hook} bool TARGET_ARRAY_MODE_SUPPORTED_P (enum machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{nelems}) +Return true if GCC should try to use a scalar mode to store an array +of @var{nelems} elements, given that each element has mode @var{mode}. +Returning true here overrides the usual @code{MAX_FIXED_MODE} limit +and allows GCC to use any defined integer mode. + +One use of this hook is to support vector load and store operations +that operate on several homogeneous vectors. For example, ARM Neon +has operations like: + +@smallexample +int8x8x3_t vld3_s8 (const int8_t *) +@end smallexample + +where the return type is defined as: + +@smallexample +typedef struct int8x8x3_t +@{ + int8x8_t val[3]; +@} int8x8x3_t; +@end smallexample + +If this hook allows @code{val} to have a scalar mode, then +@code{int8x8x3_t} can have the same mode. GCC can then store +@code{int8x8x3_t}s in registers rather than forcing them onto the stack. +@end deftypefn + @node Scalar Return @subsection How Scalar Function Values Are Returned @cindex return values in registers Index: gcc/target.h =================================================================== --- gcc/target.h 2011-04-19 14:14:01.000000000 +0000 +++ gcc/target.h 2011-04-19 16:38:08.000000000 +0000 @@ -764,6 +764,9 @@ struct gcc_target for further details. */ bool (* vector_mode_supported_p) (enum machine_mode mode);
+ /* See tm.texi. */ + bool (* array_mode_supported_p) (enum machine_mode, unsigned HOST_WIDE_INT); + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ Index: gcc/target-def.h =================================================================== --- gcc/target-def.h 2011-04-19 14:14:01.000000000 +0000 +++ gcc/target-def.h 2011-04-19 16:38:08.000000000 +0000 @@ -553,6 +553,10 @@ #define TARGET_FIXED_POINT_SUPPORTED_P d #define TARGET_VECTOR_MODE_SUPPORTED_P hook_bool_mode_false #endif
+#ifndef TARGET_ARRAY_MODE_SUPPORTED_P +#define TARGET_ARRAY_MODE_SUPPORTED_P hook_bool_mode_uhwi_false +#endif + /* In hooks.c. */ #define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false #define TARGET_BRANCH_TARGET_REGISTER_CLASS \ @@ -985,6 +989,7 @@ #define TARGET_INITIALIZER \ TARGET_ADDR_SPACE_HOOKS, \ TARGET_SCALAR_MODE_SUPPORTED_P, \ TARGET_VECTOR_MODE_SUPPORTED_P, \ + TARGET_ARRAY_MODE_SUPPORTED_P, \ TARGET_RTX_COSTS, \ TARGET_ADDRESS_COST, \ TARGET_ALLOCATE_INITIAL_VALUE, \ Index: gcc/stor-layout.c =================================================================== --- gcc/stor-layout.c 2011-04-19 14:14:01.000000000 +0000 +++ gcc/stor-layout.c 2011-04-19 14:14:03.000000000 +0000 @@ -507,6 +507,34 @@ get_mode_alignment (enum machine_mode mo return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT)); }
+/* Return the natural mode of an array, given that it is SIZE bytes in + total and has elements of type ELEM_TYPE. */ + +static enum machine_mode +mode_for_array (tree elem_type, tree size) +{ + tree elem_size; + unsigned HOST_WIDE_INT int_size, int_elem_size; + bool limit_p; + + /* One-element arrays get the component type's mode. */ + elem_size = TYPE_SIZE (elem_type); + if (simple_cst_equal (size, elem_size)) + return TYPE_MODE (elem_type); + + limit_p = true; + if (host_integerp (size, 1) && host_integerp (elem_size, 1)) + { + int_size = tree_low_cst (size, 1); + int_elem_size = tree_low_cst (elem_size, 1); + if (int_elem_size > 0 + && int_size % int_elem_size == 0 + && targetm.array_mode_supported_p (TYPE_MODE (elem_type), + int_size / int_elem_size)) + limit_p = false; + } + return mode_for_size_tree (size, MODE_INT, limit_p); +} /* Subroutine of layout_decl: Force alignment required for the data type. But if the decl itself wants greater alignment, don't override that. */ @@ -2044,14 +2072,8 @@ layout_type (tree type) && (TYPE_MODE (TREE_TYPE (type)) != BLKmode || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) { - /* One-element arrays get the component type's mode. */ - if (simple_cst_equal (TYPE_SIZE (type), - TYPE_SIZE (TREE_TYPE (type)))) - SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type))); - else - SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type), - MODE_INT, 1)); - + SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), + TYPE_SIZE (type))); if (TYPE_MODE (type) != BLKmode && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2011-04-19 14:14:01.000000000 +0000 +++ gcc/config/arm/arm.c 2011-04-19 16:38:08.000000000 +0000 @@ -222,6 +222,8 @@ static const char *arm_invalid_return_ty static tree arm_promoted_type (const_tree t); static tree arm_convert_to_type (tree type, tree expr); static bool arm_scalar_mode_supported_p (enum machine_mode); +static bool arm_array_mode_supported_p (enum machine_mode, + unsigned HOST_WIDE_INT); static bool arm_frame_pointer_required (void); static bool arm_can_eliminate (const int, const int); static void arm_asm_trampoline_template (FILE *); @@ -355,6 +357,8 @@ #define TARGET_ADDRESS_COST arm_address_ #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p +#undef TARGET_ARRAY_MODE_SUPPORTED_P +#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
#undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg @@ -22435,6 +22439,20 @@ arm_vector_mode_supported_p (enum machin return true;
return false; +} + +/* Implements target hook array_mode_supported_p. */ + +static bool +arm_array_mode_supported_p (enum machine_mode mode, + unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_NEON + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && (nelems >= 2 && nelems <= 4)) + return true; + + return false; }
/* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
This patch converts LEGITIMATE_CONSTANT_P into a target hook and passes along the mode of the constant. This can then be used by 5/5.
The patch is a version of:
http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00195.html
which is still pending review after two pings. It seems pretty simple though, so I think it's worth backporting now rather than waiting for upstream approval.
The backport is very much a cut-down version. Rather than convert all targets to the new hook, I've kept LEGITIMATE_CONSTANT_P around and made it the default implementation of the new hook. Only ARM defines the hook directly.
Note that the ARM definition is supposed to be identical to the old LEGITIMATE_CONSTANT_P version. Only 5/5 is meant to change it.
Richard
gcc/ * doc/tm.texi (LEGITIMATE_CONSTANT_P): Replace with... (TARGET_LEGITIMATE_CONSTANT_P): ...this. * target.h (gcc_target): Add legitimate_constant_p. * target-def.h (TARGET_LEGITIMATE_CONSTANT_P): Define. (TARGET_INITIALIZER): Include it. * calls.c (precompute_register_parameters): Replace uses of LEGITIMATE_CONSTANT_P with targetm.legitimate_constant_p. (emit_library_call_value_1): Likewise. * expr.c (move_block_to_reg, can_store_by_pieces, emit_move_insn) (compress_float_constant, emit_push_insn, expand_expr_real_1): Likewise. * recog.c (general_operand, immediate_operand): Likewise. * reload.c (find_reloads_toplev, find_reloads_address_part): Likewise. * reload1.c (init_eliminable_invariants): Likewise. * targhooks.h (default_legitimate_constant_p); Declare. * targhooks.c (default_legitimate_constant_p): New function.
* config/arm/arm-protos.h (arm_cannot_force_const_mem): Delete. * config/arm/arm.h (ARM_LEGITIMATE_CONSTANT_P): Likewise. (THUMB_LEGITIMATE_CONSTANT_P, LEGITIMATE_CONSTANT_P): Likewise. * config/arm/arm.c (TARGET_LEGITIMATE_CONSTANT_P): Define. (arm_legitimate_constant_p_1, thumb_legitimate_constant_p) (arm_legitimate_constant_p): New functions. (arm_cannot_force_const_mem): Make static.
Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi 2011-04-19 16:38:08.000000000 +0000 +++ gcc/doc/tm.texi 2011-04-19 16:38:15.000000000 +0000 @@ -2642,8 +2642,8 @@ instruction for loading an immediate val register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when @var{x} is a floating-point constant. If the constant can't be loaded into any kind of register, code generation will be better if -@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead -of using @code{PREFERRED_RELOAD_CLASS}. +@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead +of using @code{TARGET_PREFERRED_RELOAD_CLASS}.
If an insn has pseudos in it after register allocation, reload will go through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} @@ -5628,13 +5628,13 @@ addresses. Many RISC machines have no m You may assume that @var{addr} is a valid address for the machine. @end defmac
-@defmac LEGITIMATE_CONSTANT_P (@var{x}) -A C expression that is nonzero if @var{x} is a legitimate constant for -an immediate operand on the target machine. You can assume that -@var{x} satisfies @code{CONSTANT_P}, so you need not check this. In fact, -@samp{1} is a suitable definition for this macro on machines where -anything @code{CONSTANT_P} is valid. -@end defmac +@deftypefn {Target Hook} bool TARGET_LEGITIMATE_CONSTANT_P (enum machine_mode @var{mode}, rtx @var{x}) +This hook returns true if @var{x} is a legitimate constant for a +@var{mode}-mode immediate operand on the target machine. You can assume that +@var{x} satisfies @code{CONSTANT_P}, so you need not check this. + +The default definition returns true. +@end deftypefn
@deftypefn {Target Hook} rtx TARGET_DELEGITIMIZE_ADDRESS (rtx @var{x}) This hook is used to undo the possibly obfuscating effects of the Index: gcc/target.h =================================================================== --- gcc/target.h 2011-04-19 16:38:08.000000000 +0000 +++ gcc/target.h 2011-04-19 16:38:16.000000000 +0000 @@ -645,7 +645,10 @@ struct gcc_target /* Return true if the target supports conditional execution. */ bool (* have_conditional_execution) (void);
- /* True if the constant X cannot be placed in the constant pool. */ + /* See tm.texi. */ + bool (* legitimate_constant_p) (enum machine_mode, rtx); + + /* True if the constant X cannot be placed in the constant pool. */ bool (* cannot_force_const_mem) (rtx);
/* True if the insn X cannot be duplicated. */ Index: gcc/target-def.h =================================================================== --- gcc/target-def.h 2011-04-19 16:38:08.000000000 +0000 +++ gcc/target-def.h 2011-04-19 16:38:16.000000000 +0000 @@ -563,6 +563,7 @@ #define TARGET_BRANCH_TARGET_REGISTER_CL default_branch_target_register_class #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false #define TARGET_HAVE_CONDITIONAL_EXECUTION default_have_conditional_execution +#define TARGET_LEGITIMATE_CONSTANT_P default_legitimate_constant_p #define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false #define TARGET_CANNOT_COPY_INSN_P NULL #define TARGET_COMMUTATIVE_P hook_bool_const_rtx_commutative_p @@ -965,6 +966,7 @@ #define TARGET_INITIALIZER \ TARGET_BRANCH_TARGET_REGISTER_CLASS, \ TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \ TARGET_HAVE_CONDITIONAL_EXECUTION, \ + TARGET_LEGITIMATE_CONSTANT_P, \ TARGET_CANNOT_FORCE_CONST_MEM, \ TARGET_CANNOT_COPY_INSN_P, \ TARGET_COMMUTATIVE_P, \ Index: gcc/calls.c =================================================================== --- gcc/calls.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/calls.c 2011-04-19 16:38:15.000000000 +0000 @@ -674,7 +674,7 @@ precompute_register_parameters (int num_ /* If the value is a non-legitimate constant, force it into a pseudo now. TLS symbols sometimes need a call to resolve. */ if (CONSTANT_P (args[i].value) - && !LEGITIMATE_CONSTANT_P (args[i].value)) + && !targetm.legitimate_constant_p (args[i].mode, args[i].value)) args[i].value = force_reg (args[i].mode, args[i].value);
/* If we are to promote the function arg to a wider mode, @@ -3413,7 +3413,8 @@ emit_library_call_value_1 (int retval, r
/* Make sure it is a reasonable operand for a move or push insn. */ if (!REG_P (addr) && !MEM_P (addr) - && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr))) + && !(CONSTANT_P (addr) + && targetm.legitimate_constant_p (Pmode, addr))) addr = force_operand (addr, NULL_RTX);
argvec[count].value = addr; @@ -3453,7 +3454,7 @@ emit_library_call_value_1 (int retval, r
/* Make sure it is a reasonable operand for a move or push insn. */ if (!REG_P (val) && !MEM_P (val) - && ! (CONSTANT_P (val) && LEGITIMATE_CONSTANT_P (val))) + && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val))) val = force_operand (val, NULL_RTX);
if (pass_by_reference (&args_so_far, mode, NULL_TREE, 1)) Index: gcc/expr.c =================================================================== --- gcc/expr.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/expr.c 2011-04-19 16:38:16.000000000 +0000 @@ -1537,7 +1537,7 @@ move_block_to_reg (int regno, rtx x, int if (nregs == 0) return;
- if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x)) + if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x)) x = validize_mem (force_const_mem (mode, x));
/* See if the machine can do this with a load multiple insn. */ @@ -2366,7 +2366,7 @@ can_store_by_pieces (unsigned HOST_WIDE_ offset -= size;
cst = (*constfun) (constfundata, offset, mode); - if (!LEGITIMATE_CONSTANT_P (cst)) + if (!targetm.legitimate_constant_p (mode, cst)) return 0;
if (!reverse) @@ -3440,7 +3440,7 @@ emit_move_insn (rtx x, rtx y)
y_cst = y;
- if (!LEGITIMATE_CONSTANT_P (y)) + if (!targetm.legitimate_constant_p (mode, y)) { y = force_const_mem (mode, y);
@@ -3496,7 +3496,7 @@ compress_float_constant (rtx x, rtx y)
REAL_VALUE_FROM_CONST_DOUBLE (r, y);
- if (LEGITIMATE_CONSTANT_P (y)) + if (targetm.legitimate_constant_p (dstmode, y)) oldcost = rtx_cost (y, SET, speed); else oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed); @@ -3519,7 +3519,7 @@ compress_float_constant (rtx x, rtx y)
trunc_y = CONST_DOUBLE_FROM_REAL_VALUE (r, srcmode);
- if (LEGITIMATE_CONSTANT_P (trunc_y)) + if (targetm.legitimate_constant_p (srcmode, trunc_y)) { /* Skip if the target needs extra instructions to perform the extension. */ @@ -3932,7 +3932,7 @@ emit_push_insn (rtx x, enum machine_mode by setting SKIP to 0. */ skip = (reg_parm_stack_space == 0) ? 0 : not_stack;
- if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x)) + if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x)) x = validize_mem (force_const_mem (mode, x));
/* If X is a hard register in a non-integer mode, copy it into a pseudo; @@ -8951,7 +8951,7 @@ expand_expr_real_1 (tree exp, rtx target constant and we don't need a memory reference. */ if (CONSTANT_P (op0) && mode2 != BLKmode - && LEGITIMATE_CONSTANT_P (op0) + && targetm.legitimate_constant_p (mode2, op0) && !must_force_mem) op0 = force_reg (mode2, op0);
Index: gcc/recog.c =================================================================== --- gcc/recog.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/recog.c 2011-04-19 16:38:16.000000000 +0000 @@ -932,7 +932,9 @@ general_operand (rtx op, enum machine_mo return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode || mode == VOIDmode) && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) - && LEGITIMATE_CONSTANT_P (op)); + && targetm.legitimate_constant_p (mode == VOIDmode + ? GET_MODE (op) + : mode, op));
/* Except for certain constants with VOIDmode, already checked for, OP's mode must match MODE if MODE specifies a mode. */ @@ -1109,7 +1111,9 @@ immediate_operand (rtx op, enum machine_ && (GET_MODE (op) == mode || mode == VOIDmode || GET_MODE (op) == VOIDmode) && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) - && LEGITIMATE_CONSTANT_P (op)); + && targetm.legitimate_constant_p (mode == VOIDmode + ? GET_MODE (op) + : mode, op)); }
/* Returns 1 if OP is an operand that is a CONST_INT. */ @@ -1175,7 +1179,9 @@ nonmemory_operand (rtx op, enum machine_ return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode || mode == VOIDmode) && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) - && LEGITIMATE_CONSTANT_P (op)); + && targetm.legitimate_constant_p (mode == VOIDmode + ? GET_MODE (op) + : mode, op)); }
if (GET_MODE (op) != mode && mode != VOIDmode) Index: gcc/reload.c =================================================================== --- gcc/reload.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/reload.c 2011-04-19 16:38:16.000000000 +0000 @@ -4739,7 +4739,8 @@ find_reloads_toplev (rtx x, int opnum, e simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno], GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); gcc_assert (tem); - if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem)) + if (CONSTANT_P (tem) + && !targetm.legitimate_constant_p (GET_MODE (x), tem)) { tem = force_const_mem (GET_MODE (x), tem); i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0), @@ -6061,7 +6062,7 @@ find_reloads_address_part (rtx x, rtx *l enum reload_type type, int ind_levels) { if (CONSTANT_P (x) - && (! LEGITIMATE_CONSTANT_P (x) + && (!targetm.legitimate_constant_p (mode, x) || PREFERRED_RELOAD_CLASS (x, rclass) == NO_REGS)) { x = force_const_mem (mode, x); @@ -6071,7 +6072,7 @@ find_reloads_address_part (rtx x, rtx *l
else if (GET_CODE (x) == PLUS && CONSTANT_P (XEXP (x, 1)) - && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1)) + && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1)) || PREFERRED_RELOAD_CLASS (XEXP (x, 1), rclass) == NO_REGS)) { rtx tem; Index: gcc/reload1.c =================================================================== --- gcc/reload1.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/reload1.c 2011-04-19 16:38:16.000000000 +0000 @@ -4164,6 +4164,9 @@ init_eliminable_invariants (rtx first, b } else if (function_invariant_p (x)) { + enum machine_mode mode; + + mode = GET_MODE (SET_DEST (set)); if (GET_CODE (x) == PLUS) { /* This is PLUS of frame pointer and a constant, @@ -4176,12 +4179,11 @@ init_eliminable_invariants (rtx first, b reg_equiv_invariant[i] = x; num_eliminable_invariants++; } - else if (LEGITIMATE_CONSTANT_P (x)) + else if (targetm.legitimate_constant_p (mode, x)) reg_equiv_constant[i] = x; else { - reg_equiv_memory_loc[i] - = force_const_mem (GET_MODE (SET_DEST (set)), x); + reg_equiv_memory_loc[i] = force_const_mem (mode, x); if (! reg_equiv_memory_loc[i]) reg_equiv_init[i] = NULL_RTX; } Index: gcc/targhooks.h =================================================================== --- gcc/targhooks.h 2011-04-19 16:38:08.000000000 +0000 +++ gcc/targhooks.h 2011-04-19 16:38:16.000000000 +0000 @@ -132,3 +132,4 @@ extern bool default_addr_space_subset_p extern rtx default_addr_space_convert (rtx, tree, tree); extern unsigned int default_case_values_threshold (void); extern bool default_have_conditional_execution (void); +extern bool default_legitimate_constant_p (enum machine_mode, rtx); Index: gcc/targhooks.c =================================================================== --- gcc/targhooks.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/targhooks.c 2011-04-20 07:50:38.000000000 +0000 @@ -1008,4 +1008,15 @@ default_have_conditional_execution (void #endif }
+bool +default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED) +{ +#ifdef LEGITIMATE_CONSTANT_P + return LEGITIMATE_CONSTANT_P (x); +#else + return true; +#endif +} + #include "gt-targhooks.h" Index: gcc/config/arm/arm-protos.h =================================================================== --- gcc/config/arm/arm-protos.h 2011-04-19 16:38:08.000000000 +0000 +++ gcc/config/arm/arm-protos.h 2011-04-19 16:38:16.000000000 +0000 @@ -81,7 +81,6 @@ extern void neon_disambiguate_copy (rtx extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, bool); extern bool arm_tls_referenced_p (rtx); -extern bool arm_cannot_force_const_mem (rtx);
extern int cirrus_memory_offset (rtx); extern int arm_coproc_mem_operand (rtx, bool); Index: gcc/config/arm/arm.h =================================================================== --- gcc/config/arm/arm.h 2011-04-19 16:38:08.000000000 +0000 +++ gcc/config/arm/arm.h 2011-04-19 16:38:16.000000000 +0000 @@ -1996,27 +1996,6 @@ #define ARM_OFFSETS_MUST_BE_WITHIN_SECTI #define TARGET_DEFAULT_WORD_RELOCATIONS 0 #endif
-/* Nonzero if the constant value X is a legitimate general operand. - It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. - - On the ARM, allow any integer (invalid ones are removed later by insn - patterns), nice doubles and symbol_refs which refer to the function's - constant pool XXX. - - When generating pic allow anything. */ -#define ARM_LEGITIMATE_CONSTANT_P(X) (flag_pic || ! label_mentioned_p (X)) - -#define THUMB_LEGITIMATE_CONSTANT_P(X) \ - ( GET_CODE (X) == CONST_INT \ - || GET_CODE (X) == CONST_DOUBLE \ - || CONSTANT_ADDRESS_P (X) \ - || flag_pic) - -#define LEGITIMATE_CONSTANT_P(X) \ - (!arm_cannot_force_const_mem (X) \ - && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X) \ - : THUMB_LEGITIMATE_CONSTANT_P (X))) - #ifndef SUBTARGET_NAME_ENCODING_LENGTHS #define SUBTARGET_NAME_ENCODING_LENGTHS #endif Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2011-04-19 16:38:08.000000000 +0000 +++ gcc/config/arm/arm.c 2011-04-20 07:58:46.000000000 +0000 @@ -140,6 +140,8 @@ static void arm_internal_label (FILE *, static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static bool arm_have_conditional_execution (void); +static bool arm_cannot_force_const_mem (enum machine_mode, rtx); +static bool arm_legitimate_constant_p (enum machine_mode, rtx); static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); static bool thumb2_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); @@ -471,6 +473,9 @@ #define TARGET_HAVE_TLS true #undef TARGET_HAVE_CONDITIONAL_EXECUTION #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p + #undef TARGET_CANNOT_FORCE_CONST_MEM #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
@@ -6451,9 +6456,41 @@ arm_tls_referenced_p (rtx x) return for_each_rtx (&x, arm_tls_operand_p_1, NULL); }
+/* Implement TARGET_LEGITIMATE_CONSTANT_P. + + On the ARM, allow any integer (invalid ones are removed later by insn + patterns), nice doubles and symbol_refs which refer to the function's + constant pool XXX. + + When generating pic allow anything. */ + +static bool +arm_legitimate_constant_p_1 (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + return flag_pic || !label_mentioned_p (x); +} + +static bool +thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + return (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE + || CONSTANT_ADDRESS_P (x) + || flag_pic); +} + +static bool +arm_legitimate_constant_p (enum machine_mode mode, rtx x) +{ + return (!arm_cannot_force_const_mem (x) + && (TARGET_32BIT + ? arm_legitimate_constant_p_1 (mode, x) + : thumb_legitimate_constant_p (mode, x))); +} + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
-bool +static bool arm_cannot_force_const_mem (rtx x) { rtx base, offset;
This patch handles moves involving structure constants. It's a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00200.html
which Richard Earnshaw has approved, but which cannot be applied yet because it depends on 4/5. The patch is needed because 3/5 would otherwise expose new instances of the PR.
Richard
gcc/ PR target/46329 * config/arm/arm.c (arm_legitimate_constant_p_1): Return false for all Neon struct constants.
gcc/testsuite/ From Richard Earnshaw rearnsha@arm.com
PR target/46329 * gcc.target/arm/pr46329.c: New test.
Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2011-04-19 16:38:16.000000000 +0000 +++ gcc/config/arm/arm.c 2011-04-20 07:54:11.000000000 +0000 @@ -140,7 +140,7 @@ static void arm_internal_label (FILE *, static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static bool arm_have_conditional_execution (void); -static bool arm_cannot_force_const_mem (enum machine_mode, rtx); +static bool arm_cannot_force_const_mem (rtx); static bool arm_legitimate_constant_p (enum machine_mode, rtx); static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); @@ -6465,8 +6465,14 @@ arm_tls_referenced_p (rtx x) When generating pic allow anything. */
static bool -arm_legitimate_constant_p_1 (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x) { + /* At present, we have no support for Neon structure constants, so forbid + them here. It might be possible to handle simple cases like 0 and -1 + in future. */ + if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)) + return false; + return flag_pic || !label_mentioned_p (x); }
Index: gcc/testsuite/gcc.target/arm/pr46329.c =================================================================== --- /dev/null 2010-10-05 15:55:33.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/pr46329.c 2011-04-19 16:38:16.000000000 +0000 @@ -0,0 +1,9 @@ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +int __attribute__ ((vector_size (32))) x; +void +foo (void) +{ + x <<= x; +}
linaro-toolchain@lists.linaro.org