The patterns for the Neon vld and vst intrinsics used the following sort of construct to refer to memory:
(mem:FOO (match_operand:SI X "register_operand" "r"))
This patch changes them to use:
(match_operand:FOO' X "neon_struct_operand" "(=)Um")
instead. This allows the loads to use post-increment addresses as well as bare registers, and also matches the form that the vec_load_lanes and vec_store_lanes optabs need. (Those optabs will be in a later autovectorisation merge.)
The patch is a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01996.html
which has been applied to 4.7. There are three differences in the 4.5 version:
* Our 4.5 code prints alignments as "[rN, :ALIGN]" rather than "[rN:ALIGN]". I've fixed that here. The initial commit to FSF trunk used the correct form, so there isn't a separate fix that could be backported.
* 4.5 doesn't have MEM_REF, so neon_dereference_pointer uses an INDIRECT_REF instead.
* 4.5 defines the mode attributes in neon.md rather than in a separate iterators.md.
Richard
gcc/ Backport from mainline:
2011-04-12 Richard Sandiford richard.sandiford@linaro.org
* config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the size of a '%A' memory reference. (T_DREG, T_QREG): New neon_builtin_type_bits. (arm_init_neon_builtins): Assert that the load and store operands are neon_struct_operands. (locate_neon_builtin_icode): Provide the neon_builtin_type_bits. (NEON_ARG_MEMORY): New builtin_arg. (neon_dereference_pointer): New function. (arm_expand_neon_args): Add a neon_builtin_type_bits argument. Handle NEON_ARG_MEMORY. (arm_expand_neon_builtin): Update after above interface changes. Use NEON_ARG_MEMORY for loads and stores. * config/arm/predicates.md (neon_struct_operand): New predicate. * config/arm/neon.md (V_two_elem): Tweak formatting. (V_three_elem): Use BLKmode for accesses that have no associated mode. (neon_vld1<mode>, neon_vld1_dup<mode>) (neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>) (neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>) (neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>) (neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>) (neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>) (neon_vst4<mode>): Replace pointer operand with a memory operand. Use %A in the output template. (neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>) (neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>) (neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve the width of the memory access. Remove post-increment. * config/arm/neon-testgen.ml: Allow addresses to have an alignment.
gcc/testsuite/ Backport from mainline:
2011-04-12 Richard Sandiford richard.sandiford@linaro.org
* gcc.target/arm/neon-vld3-1.c: New test. * gcc.target/arm/neon-vst3-1.c: New test. * gcc.target/arm/neon/v*.c: Regenerate.
Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/arm.c 2011-04-20 09:32:44.000000000 +0000 @@ -16847,7 +16847,7 @@ arm_print_operand (FILE *stream, rtx x, { rtx addr; bool postinc = FALSE; - unsigned align, modesize, align_bits; + unsigned align, memsize, align_bits;
gcc_assert (GET_CODE (x) == MEM); addr = XEXP (x, 0); @@ -16862,12 +16862,12 @@ arm_print_operand (FILE *stream, rtx x, instruction (for some alignments) as an aid to the memory subsystem of the target. */ align = MEM_ALIGN (x) >> 3; - modesize = GET_MODE_SIZE (GET_MODE (x)); + memsize = INTVAL (MEM_SIZE (x)); /* Only certain alignment specifiers are supported by the hardware. */ - if (modesize == 16 && (align % 32) == 0) + if (memsize == 16 && (align % 32) == 0) align_bits = 256; - else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) + else if ((memsize == 8 || memsize == 16) && (align % 16) == 0) align_bits = 128; else if ((align % 8) == 0) align_bits = 64; @@ -16875,7 +16875,7 @@ arm_print_operand (FILE *stream, rtx x, align_bits = 0; if (align_bits != 0) - asm_fprintf (stream, ", :%d", align_bits); + asm_fprintf (stream, ":%d", align_bits);
asm_fprintf (stream, "]");
@@ -18398,12 +18398,14 @@ enum neon_builtin_type_bits { T_V2SI = 0x0004, T_V2SF = 0x0008, T_DI = 0x0010, + T_DREG = 0x001F, T_V16QI = 0x0020, T_V8HI = 0x0040, T_V4SI = 0x0080, T_V4SF = 0x0100, T_V2DI = 0x0200, T_TI = 0x0400, + T_QREG = 0x07E0, T_EI = 0x0800, T_OI = 0x1000 }; @@ -19049,10 +19051,9 @@ arm_init_neon_builtins (void) if (is_load && k == 1) { /* Neon load patterns always have the memory operand - (a SImode pointer) in the operand 1 position. We - want a const pointer to the element type in that - position. */ - gcc_assert (insn_data[icode].operand[k].mode == SImode); + in the operand 1 position. */ + gcc_assert (insn_data[icode].operand[k].predicate + == neon_struct_operand);
switch (1 << j) { @@ -19087,10 +19088,9 @@ arm_init_neon_builtins (void) else if (is_store && k == 0) { /* Similarly, Neon store patterns use operand 0 as - the memory location to store to (a SImode pointer). - Use a pointer to the element type of the store in - that position. */ - gcc_assert (insn_data[icode].operand[k].mode == SImode); + the memory location to store to. */ + gcc_assert (insn_data[icode].operand[k].predicate + == neon_struct_operand);
switch (1 << j) { @@ -19410,10 +19410,11 @@ neon_builtin_compare (const void *a, con }
static enum insn_code -locate_neon_builtin_icode (int fcode, neon_itype *itype) +locate_neon_builtin_icode (int fcode, neon_itype *itype, + enum neon_builtin_type_bits *type_bit) { neon_builtin_datum key, *found; - int idx; + int idx, type, ntypes;
key.base_fcode = fcode; found = (neon_builtin_datum *) @@ -19426,20 +19427,83 @@ locate_neon_builtin_icode (int fcode, ne if (itype) *itype = found->itype;
+ if (type_bit) + { + ntypes = 0; + for (type = 0; type < T_MAX; type++) + if (found->bits & (1 << type)) + { + if (ntypes == idx) + break; + ntypes++; + } + gcc_assert (type < T_MAX); + *type_bit = (enum neon_builtin_type_bits) (1 << type); + } return found->codes[idx]; }
typedef enum { NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_MEMORY, NEON_ARG_STOP } builtin_arg;
#define NEON_MAX_BUILTIN_ARGS 5
+/* EXP is a pointer argument to a Neon load or store intrinsic. Derive + and return an expression for the accessed memory. + + The intrinsic function operates on a block of registers that has + mode REG_MODE. This block contains vectors of type TYPE_BIT. + The function references the memory at EXP in mode MEM_MODE; + this mode may be BLKmode if no more suitable mode is available. */ + +static tree +neon_dereference_pointer (tree exp, enum machine_mode mem_mode, + enum machine_mode reg_mode, + enum neon_builtin_type_bits type_bit) +{ + HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; + tree elem_type, upper_bound, array_type; + + /* Work out the size of the register block in bytes. */ + reg_size = GET_MODE_SIZE (reg_mode); + + /* Work out the size of each vector in bytes. */ + gcc_assert (type_bit & (T_DREG | T_QREG)); + vector_size = (type_bit & T_QREG ? 16 : 8); + + /* Work out how many vectors there are. */ + gcc_assert (reg_size % vector_size == 0); + nvectors = reg_size / vector_size; + + /* Work out how many elements are being loaded or stored. + MEM_MODE == REG_MODE implies a one-to-one mapping between register + and memory elements; anything else implies a lane load or store. */ + if (mem_mode == reg_mode) + nelems = vector_size * nvectors; + else + nelems = nvectors; + + /* Work out the type of each element. */ + gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp))); + elem_type = TREE_TYPE (TREE_TYPE (exp)); + + /* Create a type that describes the full access. */ + upper_bound = build_int_cst (size_type_node, nelems - 1); + array_type = build_array_type (elem_type, build_index_type (upper_bound)); + + /* Dereference EXP using that type. */ + exp = convert (build_pointer_type (array_type), exp); + return fold_build1 (INDIRECT_REF, array_type, exp); +} + /* Expand a Neon builtin. */ static rtx arm_expand_neon_args (rtx target, int icode, int have_retval, + enum neon_builtin_type_bits type_bit, tree exp, ...) { va_list ap; @@ -19448,7 +19512,9 @@ arm_expand_neon_args (rtx target, int ic rtx op[NEON_MAX_BUILTIN_ARGS]; enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; + enum machine_mode other_mode; int argc = 0; + int opno;
if (have_retval && (!target @@ -19466,26 +19532,46 @@ arm_expand_neon_args (rtx target, int ic break; else { + opno = argc + have_retval; + mode[argc] = insn_data[icode].operand[opno].mode; arg[argc] = CALL_EXPR_ARG (exp, argc); + if (thisarg == NEON_ARG_MEMORY) + { + other_mode = insn_data[icode].operand[1 - opno].mode; + arg[argc] = neon_dereference_pointer (arg[argc], mode[argc], + other_mode, type_bit); + } op[argc] = expand_normal (arg[argc]); - mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
switch (thisarg) { case NEON_ARG_COPY_TO_REG: /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ - if (!(*insn_data[icode].operand[argc + have_retval].predicate) + if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) op[argc] = copy_to_mode_reg (mode[argc], op[argc]); break;
case NEON_ARG_CONSTANT: /* FIXME: This error message is somewhat unhelpful. */ - if (!(*insn_data[icode].operand[argc + have_retval].predicate) + if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) error ("argument must be a constant"); break;
+ case NEON_ARG_MEMORY: + gcc_assert (MEM_P (op[argc])); + PUT_MODE (op[argc], mode[argc]); + /* ??? arm_neon.h uses the same built-in functions for signed + and unsigned accesses, casting where necessary. This isn't + alias safe. */ + set_mem_alias_set (op[argc], 0); + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + op[argc] = (replace_equiv_address + (op[argc], force_reg (Pmode, XEXP (op[argc], 0)))); + break; + case NEON_ARG_STOP: gcc_unreachable (); } @@ -19564,14 +19650,15 @@ arm_expand_neon_args (rtx target, int ic arm_expand_neon_builtin (int fcode, tree exp, rtx target) { neon_itype itype; - enum insn_code icode = locate_neon_builtin_icode (fcode, &itype); + enum neon_builtin_type_bits type_bit; + enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit);
switch (itype) { case NEON_UNOP: case NEON_CONVERT: case NEON_DUPLANE: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_BINOP: @@ -19581,90 +19668,90 @@ arm_expand_neon_builtin (int fcode, tree case NEON_SCALARMULH: case NEON_SHIFTINSERT: case NEON_LOGICBINOP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_TERNOP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_GETLANE: case NEON_FIXCONV: case NEON_SHIFTIMM: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_CREATE: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_DUP: case NEON_SPLIT: case NEON_REINTERP: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_COMBINE: case NEON_VTBL: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_RESULTPAIR: - return arm_expand_neon_args (target, icode, 0, exp, + return arm_expand_neon_args (target, icode, 0, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_LANEMUL: case NEON_LANEMULL: case NEON_LANEMULH: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_LANEMAC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SHIFTACC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SCALARMAC: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SELECT: case NEON_VTBX: - return arm_expand_neon_args (target, icode, 1, exp, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_LOAD1: case NEON_LOADSTRUCT: - return arm_expand_neon_args (target, icode, 1, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_STOP);
case NEON_LOAD1LANE: case NEON_LOADSTRUCTLANE: - return arm_expand_neon_args (target, icode, 1, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_STORE1: case NEON_STORESTRUCT: - return arm_expand_neon_args (target, icode, 0, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + return arm_expand_neon_args (target, icode, 0, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_STORE1LANE: case NEON_STORESTRUCTLANE: - return arm_expand_neon_args (target, icode, 0, exp, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + return arm_expand_neon_args (target, icode, 0, type_bit, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); }
Index: gcc/config/arm/predicates.md =================================================================== --- gcc/config/arm/predicates.md 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/predicates.md 2011-04-20 08:29:52.000000000 +0000 @@ -681,3 +681,7 @@ (define_special_predicate "vect_par_cons } return true; }) + +(define_special_predicate "neon_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/neon.md 2011-04-20 08:29:52.000000000 +0000 @@ -259,20 +259,18 @@ (define_mode_attr V_ext [(V8QI "SI") (V1
;; Mode of pair of elements for each vector mode, to define transfer ;; size for structure lane/dup loads and stores. -(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") - (V4HI "SI") (V8HI "SI") +(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") (V2SI "V2SI") (V4SI "V2SI") (V2SF "V2SF") (V4SF "V2SF") (DI "V2DI") (V2DI "V2DI")])
;; Similar, for three elements. -;; ??? Should we define extra modes so that sizes of all three-element -;; accesses can be accurately represented? -(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI") - (V4HI "V4HI") (V8HI "V4HI") - (V2SI "V4SI") (V4SI "V4SI") - (V2SF "V4SF") (V4SF "V4SF") - (DI "EI") (V2DI "EI")]) +(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") + (V4HI "BLK") (V8HI "BLK") + (V2SI "BLK") (V4SI "BLK") + (V2SF "BLK") (V4SF "BLK") + (DI "EI") (V2DI "EI")])
;; Similar, for four elements. (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") @@ -4567,16 +4565,16 @@ (define_expand "neon_vreinterpretv2di<mo
(define_insn "neon_vld1<mode>" [(set (match_operand:VDQX 0 "s_register_operand" "=w") - (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] UNSPEC_VLD1))] "TARGET_NEON" - "vld1.<V_sz_elem>\t%h0, [%1]" + "vld1.<V_sz_elem>\t%h0, %A1" [(set_attr "neon_type" "neon_vld1_1_2_regs")] )
(define_insn "neon_vld1_lane<mode>" [(set (match_operand:VDX 0 "s_register_operand" "=w") - (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") (match_operand:VDX 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VLD1_LANE))] @@ -4587,9 +4585,9 @@ (define_insn "neon_vld1_lane<mode>" if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) - return "vld1.<V_sz_elem>\t%P0, [%1]"; + return "vld1.<V_sz_elem>\t%P0, %A1"; else - return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) @@ -4599,7 +4597,7 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_lane<mode>" [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") (match_operand:VQX 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i")] UNSPEC_VLD1_LANE))] @@ -4618,9 +4616,9 @@ (define_insn "neon_vld1_lane<mode>" } operands[0] = gen_rtx_REG (<V_HALF>mode, regno); if (max == 2) - return "vld1.<V_sz_elem>\t%P0, [%1]"; + return "vld1.<V_sz_elem>\t%P0, %A1"; else - return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) @@ -4630,14 +4628,14 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_dup<mode>" [(set (match_operand:VDX 0 "s_register_operand" "=w") - (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] UNSPEC_VLD1_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 1) - return "vld1.<V_sz_elem>\t{%P0[]}, [%1]"; + return "vld1.<V_sz_elem>\t{%P0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4647,14 +4645,14 @@ (define_insn "neon_vld1_dup<mode>"
(define_insn "neon_vld1_dup<mode>" [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] + (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] UNSPEC_VLD1_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 2) - return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; + return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4663,15 +4661,15 @@ (define_insn "neon_vld1_dup<mode>" )
(define_insn "neon_vst1<mode>" - [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] UNSPEC_VST1))] "TARGET_NEON" - "vst1.<V_sz_elem>\t%h1, [%0]" + "vst1.<V_sz_elem>\t%h1, %A0" [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
(define_insn "neon_vst1_lane<mode>" - [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") (vec_select:<V_elem> (match_operand:VDX 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] @@ -4682,9 +4680,9 @@ (define_insn "neon_vst1_lane<mode>" if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) - return "vst1.<V_sz_elem>\t{%P1}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1}, %A0"; else - return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1)) @@ -4692,7 +4690,7 @@ (define_insn "neon_vst1_lane<mode>" (const_string "neon_vst1_vst2_lane")))])
(define_insn "neon_vst1_lane<mode>" - [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") (vec_select:<V_elem> (match_operand:VQX 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] @@ -4711,24 +4709,24 @@ (define_insn "neon_vst1_lane<mode>" } operands[1] = gen_rtx_REG (<V_HALF>mode, regno); if (max == 2) - return "vst1.<V_sz_elem>\t{%P1}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1}, %A0"; else - return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; + return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] )
(define_insn "neon_vld2<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld2.<V_sz_elem>\t%h0, [%1]"; + return "vld2.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4738,16 +4736,16 @@ (define_insn "neon_vld2<mode>"
(define_insn "neon_vld2<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" - "vld2.<V_sz_elem>\t%h0, [%1]" + "vld2.<V_sz_elem>\t%h0, %A1" [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
(define_insn "neon_vld2_lane<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (match_operand:TI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4764,7 +4762,7 @@ (define_insn "neon_vld2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = operands[1]; ops[3] = operands[3]; - output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } [(set_attr "neon_type" "neon_vld1_vld2_lane")] @@ -4772,7 +4770,7 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_lane<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4794,7 +4792,7 @@ (define_insn "neon_vld2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = operands[1]; ops[3] = GEN_INT (lane); - output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } [(set_attr "neon_type" "neon_vld1_vld2_lane")] @@ -4802,15 +4800,15 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_dup<mode>" [(set (match_operand:TI 0 "s_register_operand" "=w") - (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_DUP))] "TARGET_NEON" { if (GET_MODE_NUNITS (<MODE>mode) > 1) - return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; + return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -4819,16 +4817,16 @@ (define_insn "neon_vld2_dup<mode>" )
(define_insn "neon_vst2<mode>" - [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst2.<V_sz_elem>\t%h1, [%0]"; + return "vst2.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4837,17 +4835,17 @@ (define_insn "neon_vst2<mode>" )
(define_insn "neon_vst2<mode>" - [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" - "vst2.<V_sz_elem>\t%h1, [%0]" + "vst2.<V_sz_elem>\t%h1, %A0" [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] )
(define_insn "neon_vst2_lane<mode>" - [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_two_elem> [(match_operand:TI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -4865,14 +4863,14 @@ (define_insn "neon_vst2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = operands[2]; - output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] )
(define_insn "neon_vst2_lane<mode>" - [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_two_elem> [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -4895,7 +4893,7 @@ (define_insn "neon_vst2_lane<mode>" ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = GEN_INT (lane); - output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst1_vst2_lane")] @@ -4903,15 +4901,15 @@ (define_insn "neon_vst2_lane<mode>"
(define_insn "neon_vld3<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld3.<V_sz_elem>\t%h0, [%1]"; + return "vld3.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -4920,25 +4918,25 @@ (define_insn "neon_vld3<mode>" )
(define_expand "neon_vld3<mode>" - [(match_operand:CI 0 "s_register_operand" "=w") - (match_operand:SI 1 "s_register_operand" "+r") + [(match_operand:CI 0 "s_register_operand") + (match_operand:CI 1 "neon_struct_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[1], operands[1])); - emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0], - operands[1], operands[1])); + rtx mem; + + mem = adjust_address (operands[1], EImode, 0); + emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); DONE; })
(define_insn "neon_vld3qa<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:CI (match_operand:SI 2 "s_register_operand" "1")) + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD3A)) - (set (match_operand:SI 1 "s_register_operand" "=r") - (plus:SI (match_dup 2) - (const_int 24)))] + UNSPEC_VLD3A))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -4947,7 +4945,7 @@ (define_insn "neon_vld3qa<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = operands[1]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); + output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -4955,13 +4953,10 @@ (define_insn "neon_vld3qa<mode>"
(define_insn "neon_vld3qb<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:CI 1 "s_register_operand" "0") + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (match_operand:CI 2 "s_register_operand" "0") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD3B)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) - (const_int 24)))] + UNSPEC_VLD3B))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -4969,8 +4964,8 @@ (define_insn "neon_vld3qb<mode>" ops[0] = gen_rtx_REG (DImode, regno + 2); ops[1] = gen_rtx_REG (DImode, regno + 6); ops[2] = gen_rtx_REG (DImode, regno + 10); - ops[3] = operands[2]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); + ops[3] = operands[1]; + output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -4978,7 +4973,7 @@ (define_insn "neon_vld3qb<mode>"
(define_insn "neon_vld3_lane<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (match_operand:EI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -4996,7 +4991,7 @@ (define_insn "neon_vld3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; ops[4] = operands[3]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", ops); return ""; } @@ -5005,7 +5000,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_lane<mode>" [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (match_operand:CI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5028,7 +5023,7 @@ (define_insn "neon_vld3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = operands[1]; ops[4] = GEN_INT (lane); - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", ops); return ""; } @@ -5037,7 +5032,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_dup<mode>" [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_DUP))] "TARGET_NEON" @@ -5050,11 +5045,11 @@ (define_insn "neon_vld3_dup<mode>" ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops); + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops); return ""; } else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -5062,16 +5057,16 @@ (define_insn "neon_vld3_dup<mode>" (const_string "neon_vld1_1_2_regs")))])
(define_insn "neon_vst3<mode>" - [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst3.<V_sz_elem>\t%h1, [%0]"; + return "vst3.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5079,62 +5074,60 @@ (define_insn "neon_vst3<mode>" (const_string "neon_vst2_4_regs_vst3_vst4")))])
(define_expand "neon_vst3<mode>" - [(match_operand:SI 0 "s_register_operand" "+r") - (match_operand:CI 1 "s_register_operand" "w") + [(match_operand:CI 0 "neon_struct_operand") + (match_operand:CI 1 "s_register_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1])); - emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1])); + rtx mem; + + mem = adjust_address (operands[0], EImode, 0); + emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); DONE; })
(define_insn "neon_vst3qa<mode>" - [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST3A)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 24)))] + UNSPEC_VST3A))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[4]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); + output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst3qb<mode>" - [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST3B)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 24)))] + UNSPEC_VST3B))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[4]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 6); ops[3] = gen_rtx_REG (DImode, regno + 10); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); + output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst3_lane<mode>" - [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_three_elem> [(match_operand:EI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5153,7 +5146,7 @@ (define_insn "neon_vst3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = operands[2]; - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", ops); return ""; } @@ -5161,7 +5154,7 @@ (define_insn "neon_vst3_lane<mode>" )
(define_insn "neon_vst3_lane<mode>" - [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_three_elem> [(match_operand:CI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5185,7 +5178,7 @@ (define_insn "neon_vst3_lane<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = GEN_INT (lane); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", ops); return ""; } @@ -5193,15 +5186,15 @@ (define_insn "neon_vst3_lane<mode>"
(define_insn "neon_vld4<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vld1.64\t%h0, [%1]"; + return "vld1.64\t%h0, %A1"; else - return "vld4.<V_sz_elem>\t%h0, [%1]"; + return "vld4.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5210,25 +5203,25 @@ (define_insn "neon_vld4<mode>" )
(define_expand "neon_vld4<mode>" - [(match_operand:XI 0 "s_register_operand" "=w") - (match_operand:SI 1 "s_register_operand" "+r") + [(match_operand:XI 0 "s_register_operand") + (match_operand:XI 1 "neon_struct_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[1], operands[1])); - emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0], - operands[1], operands[1])); + rtx mem; + + mem = adjust_address (operands[1], OImode, 0); + emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); DONE; })
(define_insn "neon_vld4qa<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1")) + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD4A)) - (set (match_operand:SI 1 "s_register_operand" "=r") - (plus:SI (match_dup 2) - (const_int 32)))] + UNSPEC_VLD4A))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -5238,7 +5231,7 @@ (define_insn "neon_vld4qa<mode>" ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = gen_rtx_REG (DImode, regno + 12); ops[4] = operands[1]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -5246,13 +5239,10 @@ (define_insn "neon_vld4qa<mode>"
(define_insn "neon_vld4qb<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:XI 1 "s_register_operand" "0") + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (match_operand:XI 2 "s_register_operand" "0") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD4B)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) - (const_int 32)))] + UNSPEC_VLD4B))] "TARGET_NEON" { int regno = REGNO (operands[0]); @@ -5261,8 +5251,8 @@ (define_insn "neon_vld4qb<mode>" ops[1] = gen_rtx_REG (DImode, regno + 6); ops[2] = gen_rtx_REG (DImode, regno + 10); ops[3] = gen_rtx_REG (DImode, regno + 14); - ops[4] = operands[2]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + ops[4] = operands[1]; + output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); return ""; } [(set_attr "neon_type" "neon_vld3_vld4")] @@ -5270,7 +5260,7 @@ (define_insn "neon_vld4qb<mode>"
(define_insn "neon_vld4_lane<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5289,7 +5279,7 @@ (define_insn "neon_vld4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 6); ops[4] = operands[1]; ops[5] = operands[3]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", ops); return ""; } @@ -5298,7 +5288,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_lane<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (match_operand:XI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] @@ -5322,7 +5312,7 @@ (define_insn "neon_vld4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 12); ops[4] = operands[1]; ops[5] = GEN_INT (lane); - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", ops); return ""; } @@ -5331,7 +5321,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_dup<mode>" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) + (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_DUP))] "TARGET_NEON" @@ -5345,12 +5335,12 @@ (define_insn "neon_vld4_dup<mode>" ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 6); ops[4] = operands[1]; - output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]", + output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); return ""; } else - return "vld1.<V_sz_elem>\t%h0, [%1]"; + return "vld1.<V_sz_elem>\t%h0, %A1"; } [(set (attr "neon_type") (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) @@ -5359,16 +5349,16 @@ (define_insn "neon_vld4_dup<mode>" )
(define_insn "neon_vst4<mode>" - [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4))] "TARGET_NEON" { if (<V_sz_elem> == 64) - return "vst1.64\t%h1, [%0]"; + return "vst1.64\t%h1, %A0"; else - return "vst4.<V_sz_elem>\t%h1, [%0]"; + return "vst4.<V_sz_elem>\t%h1, %A0"; } [(set (attr "neon_type") (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) @@ -5377,64 +5367,62 @@ (define_insn "neon_vst4<mode>" )
(define_expand "neon_vst4<mode>" - [(match_operand:SI 0 "s_register_operand" "+r") - (match_operand:XI 1 "s_register_operand" "w") + [(match_operand:XI 0 "neon_struct_operand") + (match_operand:XI 1 "s_register_operand") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1])); - emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1])); + rtx mem; + + mem = adjust_address (operands[0], OImode, 0); + emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); DONE; })
(define_insn "neon_vst4qa<mode>" - [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST4A)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 32)))] + UNSPEC_VST4A))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[5]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = gen_rtx_REG (DImode, regno + 12); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst4qb<mode>" - [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) - (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST4B)) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (match_dup 1) - (const_int 32)))] + UNSPEC_VST4B))] "TARGET_NEON" { - int regno = REGNO (operands[2]); + int regno = REGNO (operands[1]); rtx ops[5]; ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 6); ops[3] = gen_rtx_REG (DImode, regno + 10); ops[4] = gen_rtx_REG (DImode, regno + 14); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); return ""; } [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] )
(define_insn "neon_vst4_lane<mode>" - [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_four_elem> [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5454,7 +5442,7 @@ (define_insn "neon_vst4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = gen_rtx_REG (DImode, regno + 6); ops[5] = operands[2]; - output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", ops); return ""; } @@ -5462,7 +5450,7 @@ (define_insn "neon_vst4_lane<mode>" )
(define_insn "neon_vst4_lane<mode>" - [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) + [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") (unspec:<V_four_elem> [(match_operand:XI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") @@ -5487,7 +5475,7 @@ (define_insn "neon_vst4_lane<mode>" ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = gen_rtx_REG (DImode, regno + 12); ops[5] = GEN_INT (lane); - output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", ops); return ""; } Index: gcc/config/arm/neon-testgen.ml =================================================================== --- gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:44.000000000 +0000 +++ gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:52.000000000 +0000 @@ -177,7 +177,7 @@ let rec analyze_shape shape = let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in "\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\}" | (PtrTo elt | CstPtrTo elt) -> - "\\\[" ^ (analyze_shape_elt elt) ^ "\\\]" + "\\\[" ^ (analyze_shape_elt elt) ^ "\(:\[0-9\]+\)?\\\]" | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\[\[0-9\]+\\\]" | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\[\[0-9\]+\\\]" | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\[\\\]" Index: gcc/testsuite/gcc.target/arm/neon-vld3-1.c =================================================================== --- /dev/null 2010-10-05 15:55:33.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon-vld3-1.c 2011-04-20 08:29:52.000000000 +0000 @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint32_t buffer[12]; + +void __attribute__((noinline)) +foo (uint32_t *a) +{ + uint32x4x3_t x; + + x = vld3q_u32 (a); + x.val[0] = vaddq_u32 (x.val[0], x.val[1]); + vst3q_u32 (a, x); +} + +int +main (void) +{ + buffer[0] = 1; + buffer[1] = 2; + foo (buffer); + return buffer[0] != 3; +} Index: gcc/testsuite/gcc.target/arm/neon-vst3-1.c =================================================================== --- /dev/null 2010-10-05 15:55:33.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon-vst3-1.c 2011-04-20 08:29:52.000000000 +0000 @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint32_t buffer[64]; + +void __attribute__((noinline)) +foo (uint32_t *a) +{ + uint32x4x3_t x; + + x = vld3q_u32 (a); + a[35] = 1; + vst3q_lane_u32 (a + 32, x, 1); +} + +int +main (void) +{ + foo (buffer); + return buffer[35] != 1; +} Index: gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c =================================================================== --- gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20 08:29:45.000000000 +0000 +++ gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20 08:29:52.000000000 +0000 @@ -15,5 +15,5 @@ void test_vld1Q_dupf32 (void) out_float32x4_t = vld1q_dup_f32 (0); }
-/* { dg-final { scan-assembler "vld1.32[ ]+\{(([dD][0-9]+\[\]-[dD][0-9]+\[\])|([dD][0-9]+\[\], [dD][0-9]+\[\]))\}, \[[rR][0-9]+\]!?([ ]+@.*)?\n" } } */ +/* { dg-final { scan-assembler "vld1.32[ ]+\{(([dD][0-9]+\[\]-[dD][0-9]+\[\])|([dD][0-9]+\[\], [dD][0-9]+\[\]))\}, \[[rR][0-9]+(:[0-9]+)?\]!?([ ]+@.*)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */
[...etc...]