diff -u gcc-5-5.2.1/debian/changelog gcc-5-5.2.1/debian/changelog --- gcc-5-5.2.1/debian/changelog +++ gcc-5-5.2.1/debian/changelog @@ -1,9 +1,45 @@ -gcc-5 (5.2.1-22ubuntu3) wily; urgency=medium +gcc-5 (5.2.1-23ubuntu1~15.10) wily; urgency=medium - * Update to SVN 20151020 (r229103, 5.2.1) from the gcc-5-branch. + * Test build, using the Linaro support on armhf and arm64. + + -- Matthias Klose Fri, 30 Oct 2015 19:18:04 +0200 + +gcc-5 (5.2.1-23ubuntu1) xenial; urgency=medium + + * Again, configure with --enable-targets=powerpcle-linux on ppc64el. + + -- Matthias Klose Wed, 28 Oct 2015 12:18:49 +0100 + +gcc-5 (5.2.1-23) unstable; urgency=medium + + * Update to SVN 20151028 (r229478, 5.2.1) from the gcc-5-branch. + + [ Matthias Klose ] + * Update the Linaro support to the 5-2015.10 snapshot. + * gcj: On ppc64el, use the same jvm archdir name as for openjdk (ppc64le). + * gcj: Fix priority of java alternatives. Closes: #803055. + * gnat-5: Reintroduce the unversioned gnatgcc name. Closes: #802838. + + [ Aurelien Jarno ] + * Replace proposed patch for PR rtl-optimization/67736 by the one + committed on trunk. + + -- Matthias Klose Wed, 28 Oct 2015 10:36:54 +0100 + +gcc-5 (5.2.1-22ubuntu5) xenial; urgency=medium + + * Revert the fix for PR ipa/67056, causing an ICE. + + -- Matthias Klose Fri, 23 Oct 2015 19:13:51 +0200 + +gcc-5 (5.2.1-22ubuntu4) xenial; urgency=medium + + * Update to SVN 20151022 (r229176, 5.2.1) from the gcc-5-branch. + * Fix PR ipa/67056, taken from the trunk. Closes: #788299. * Target POWER8 on ppc64el. + * Again, don't strip the compiler binaries for more verbose ICEs. - -- Matthias Klose Wed, 21 Oct 2015 03:36:24 +0200 + -- Matthias Klose Thu, 22 Oct 2015 17:32:47 +0200 gcc-5 (5.2.1-22ubuntu2) wily; urgency=medium diff -u gcc-5-5.2.1/debian/control gcc-5-5.2.1/debian/control --- gcc-5-5.2.1/debian/control +++ gcc-5-5.2.1/debian/control @@ -13,7 +13,7 @@ libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], autogen, gawk, lzma, xz-utils, patchutils, zlib1g-dev, systemtap-sdt-dev [linux-any kfreebsd-any hurd-any], - binutils (>= 2.25.51.20151020-1~) | binutils-multiarch (>= 2.25.51.20151020-1~), binutils-hppa64-linux-gnu (>= 2.25.51.20151020-1~) [hppa], + binutils (>= 2.22) | binutils-multiarch (>= 2.22), binutils-hppa64-linux-gnu (>= 2.22) [hppa], gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, gdb, texinfo (>= 4.3), locales, sharutils, diff -u gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff --- gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff +++ gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff @@ -1,4 +1,4 @@ -# DP: Changes for the Linaro 5-2015.09 release (documentation). +# DP: Changes for the Linaro 5-2015.10 release (documentation). --- a/src/gcc/doc/extend.texi +++ b/src/gcc/doc/extend.texi @@ -715,7 +715,7 @@ The @code{ifunc} attribute is used to mark a function as an indirect function using the STT_GNU_IFUNC symbol type extension to the ELF standard. This allows the resolution of the symbol value to be -@@ -2883,254 +2715,780 @@ Indirect functions cannot be weak. Binutils version 2.20.1 or higher +@@ -2883,324 +2715,585 @@ Indirect functions cannot be weak. Binutils version 2.20.1 or higher and GNU C Library version 2.11.1 are required to use this feature. @item interrupt @@ -1057,7 +1057,13 @@ +@smallexample +@group +void fatal () __attribute__ ((noreturn)); -+ + +-On RX targets, you may specify one or more vector numbers as arguments +-to the attribute, as well as naming an alternate table name. +-Parameters are handled sequentially, so one handler can be assigned to +-multiple entries in multiple tables. One may also pass the magic +-string @code{"$default"} which causes the function to be used for any +-unfilled slots in the current table. +void +fatal (/* @r{@dots{}} */) +@{ @@ -1118,11 +1124,18 @@ +to common subexpression elimination and loop optimization just as an +arithmetic operator would be. These functions should be declared +with the attribute @code{pure}. For example, -+ -+@smallexample + +-This example shows a simple assignment of a function to one vector in +-the default table (note that preprocessor macros may be used for +-chip-specific symbolic vector names): + @smallexample +-void __attribute__ ((interrupt (5))) txd1_handler (); +int square (int) __attribute__ ((pure)); -+@end smallexample -+ + @end smallexample + +-This example assigns a function to two slots in the default table +-(using preprocessor macros defined elsewhere) and makes it the default +-for the @code{dct} table: +@noindent +says that the hypothetical function @code{square} is safe to call +fewer times than the program says. @@ -1137,15 +1150,33 @@ +The @code{returns_nonnull} attribute specifies that the function +return value should be a non-null pointer. For instance, the declaration: + -+@smallexample + @smallexample +-void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default"))) +- txd1_handler (); +extern void * +mymalloc (size_t len) __attribute__((returns_nonnull)); -+@end smallexample -+ + @end smallexample + +-@item interrupt_handler +-@cindex @code{interrupt_handler} function attribute, Blackfin +-@cindex @code{interrupt_handler} function attribute, m68k +-@cindex @code{interrupt_handler} function attribute, H8/300 +-@cindex @code{interrupt_handler} function attribute, SH +-Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to +-indicate that the specified function is an interrupt handler. The compiler +-generates function entry and exit sequences suitable for use in an +-interrupt handler when this attribute is present. +@noindent +lets the compiler optimize callers based on the knowledge +that the return value will never be null. -+ + +-@item interrupt_thread +-@cindex @code{interrupt_thread} function attribute, fido +-Use this attribute on fido, a subarchitecture of the m68k, to indicate +-that the specified function is an interrupt handler that is designed +-to run as a thread. The compiler omits generate prologue/epilogue +-sequences and replaces the return instruction with a @code{sleep} +-instruction. This attribute is available only on fido. +@item returns_twice +@cindex @code{returns_twice} function attribute +@cindex functions that return more than once @@ -1156,7 +1187,11 @@ +function. Examples of such functions are @code{setjmp} and @code{vfork}. +The @code{longjmp}-like counterpart of such function, if any, might need +to be marked with the @code{noreturn} attribute. -+ + +-@item isr +-@cindex @code{isr} function attribute, ARM +-Use this attribute on ARM to write Interrupt Service Routines. This is an +-alias to the @code{interrupt} attribute above. +@item section ("@var{section-name}") +@cindex @code{section} function attribute +@cindex functions in arbitrary sections @@ -1165,19 +1200,46 @@ +particular functions to appear in special sections. The @code{section} +attribute specifies that a function lives in a particular section. +For example, the declaration: -+ + +-@item kspisusp +-@cindex @code{kspisusp} function attribute, Blackfin +-@cindex User stack pointer in interrupts on the Blackfin +-When used together with @code{interrupt_handler}, @code{exception_handler} +-or @code{nmi_handler}, code is generated to load the stack pointer +-from the USP register in the function prologue. +@smallexample +extern void foobar (void) __attribute__ ((section ("bar"))); +@end smallexample -+ + +-@item l1_text +-@cindex @code{l1_text} function attribute, Blackfin +-This attribute specifies a function to be placed into L1 Instruction +-SRAM@. The function is put into a specific section named @code{.l1.text}. +-With @option{-mfdpic}, function calls with a such function as the callee +-or caller uses inlined PLT. +@noindent +puts the function @code{foobar} in the @code{bar} section. -+ + +-@item l2 +-@cindex @code{l2} function attribute, Blackfin +-On the Blackfin, this attribute specifies a function to be placed into L2 +-SRAM. The function is put into a specific section named +-@code{.l1.text}. With @option{-mfdpic}, callers of such functions use +-an inlined PLT. +Some file formats do not support arbitrary sections so the @code{section} +attribute is not available on all platforms. +If you need to map the entire contents of a module to a particular +section, consider using the facilities of the linker instead. -+ + +-@item leaf +-@cindex @code{leaf} function attribute +-Calls to external functions with this attribute must return to the current +-compilation unit only by return or by exception handling. In particular, leaf +-functions are not allowed to call callback function passed to it from the current +-compilation unit or directly call functions exported by the unit or longjmp +-into the unit. Leaf function might still call functions from other compilation +-units and thus they are not necessarily leaf in the sense that they contain no +-function calls at all. +@item sentinel +@cindex @code{sentinel} function attribute +This function attribute ensures that a parameter in a function call is @@ -1186,17 +1248,29 @@ +last parameter of the function call. If an optional integer position +argument P is supplied to the attribute, the sentinel must be located at +position P counting backwards from the end of the argument list. -+ + +-The attribute is intended for library functions to improve dataflow analysis. +-The compiler takes the hint that any data not escaping the current compilation unit can +-not be used or modified by the leaf function. For example, the @code{sin} function +-is a leaf function, but @code{qsort} is not. +@smallexample +__attribute__ ((sentinel)) +is equivalent to +__attribute__ ((sentinel(0))) +@end smallexample -+ + +-Note that leaf functions might invoke signals and signal handlers might be +-defined in the current compilation unit and use static variables. The only +-compliant way to write such a signal handler is to declare such variables +-@code{volatile}. +The attribute is automatically set with a position of 0 for the built-in +functions @code{execl} and @code{execlp}. The built-in function +@code{execle} has the attribute set with a position of 1. -+ + +-The attribute has no effect on functions defined within the current compilation +-unit. This is to allow easy merging of multiple compilation units into one, +-for example, by using the link-time optimization. For this reason the +-attribute is not allowed on types to annotate indirect calls. +A valid @code{NULL} in this context is defined as zero with any pointer +type. If your system defines the @code{NULL} macro with an integer type +then you need to add an explicit cast. GCC replaces @code{stddef.h} @@ -1244,8 +1318,9 @@ +or separate the options with a comma (@samp{,}) within a single string. + +The options supported are specific to each target; refer to @ref{x86 -+Function Attributes}, @ref{PowerPC Function Attributes}, and -+@ref{Nios II Function Attributes}, for details. ++Function Attributes}, @ref{PowerPC Function Attributes}, ++@ref{ARM Function Attributes},and @ref{Nios II Function Attributes}, ++for details. + +@item unused +@cindex @code{unused} function attribute @@ -1383,13 +1458,7 @@ + return 0; +@} +@end smallexample - --On RX targets, you may specify one or more vector numbers as arguments --to the attribute, as well as naming an alternate table name. --Parameters are handled sequentially, so one handler can be assigned to --multiple entries in multiple tables. One may also pass the magic --string @code{"$default"} which causes the function to be used for any --unfilled slots in the current table. ++ +@noindent +results in warning on line 5. + @@ -1412,23 +1481,16 @@ +implicitly marks the declaration as @code{weak}. Without a +@var{target}, given as an argument to @code{weakref} or to @code{alias}, +@code{weakref} is equivalent to @code{weak}. - --This example shows a simple assignment of a function to one vector in --the default table (note that preprocessor macros may be used for --chip-specific symbolic vector names): - @smallexample --void __attribute__ ((interrupt (5))) txd1_handler (); ++ ++@smallexample +static int x() __attribute__ ((weakref ("y"))); +/* is equivalent to... */ +static int x() __attribute__ ((weak, weakref, alias ("y"))); +/* and to... */ +static int x() __attribute__ ((weakref)); +static int x() __attribute__ ((alias ("y"))); - @end smallexample - --This example assigns a function to two slots in the default table --(using preprocessor macros defined elsewhere) and makes it the default --for the @code{dct} table: ++@end smallexample ++ +A weak reference is an alias that does not by itself require a +definition to be given for the target symbol. If the target symbol is +only referenced through weak references, then it becomes a @code{weak} @@ -1465,44 +1527,104 @@ +On the ARC, you must specify the kind of interrupt to be handled +in a parameter to the interrupt attribute like this: + - @smallexample --void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default"))) -- txd1_handler (); ++@smallexample +void f () __attribute__ ((interrupt ("ilink1"))); +@end smallexample + +Permissible values for this parameter are: @w{@code{ilink1}} and +@w{@code{ilink2}}. -+ -+@item long_call -+@itemx medium_call -+@itemx short_call -+@cindex @code{long_call} function attribute, ARC -+@cindex @code{medium_call} function attribute, ARC -+@cindex @code{short_call} function attribute, ARC -+@cindex indirect calls, ARC + + @item long_call + @itemx medium_call + @itemx short_call + @cindex @code{long_call} function attribute, ARC +-@cindex @code{long_call} function attribute, ARM +-@cindex @code{long_call} function attribute, Epiphany + @cindex @code{medium_call} function attribute, ARC + @cindex @code{short_call} function attribute, ARC +-@cindex @code{short_call} function attribute, ARM +-@cindex @code{short_call} function attribute, Epiphany + @cindex indirect calls, ARC +-@cindex indirect calls, ARM +-@cindex indirect calls, Epiphany +-These attributes specify how a particular function is called on +-ARC, ARM and Epiphany - with @code{medium_call} being specific to ARC. +These attributes specify how a particular function is called. -+These attributes override the + These attributes override the +-@option{-mlong-calls} (@pxref{ARM Options} and @ref{ARC Options}) +-and @option{-mmedium-calls} (@pxref{ARC Options}) +-command-line switches and @code{#pragma long_calls} settings. For ARM, the +-@code{long_call} attribute indicates that the function might be far +-away from the call site and require a different (more expensive) +-calling sequence. The @code{short_call} attribute always places +-the offset to the function from the call site into the @samp{BL} +-instruction directly. +@option{-mlong-calls} and @option{-mmedium-calls} (@pxref{ARC Options}) +command-line switches and @code{#pragma long_calls} settings. -+ -+For ARC, a function marked with the @code{long_call} attribute is -+always called using register-indirect jump-and-link instructions, -+thereby enabling the called function to be placed anywhere within the -+32-bit address space. A function marked with the @code{medium_call} -+attribute will always be close enough to be called with an unconditional -+branch-and-link instruction, which has a 25-bit offset from -+the call site. A function marked with the @code{short_call} -+attribute will always be close enough to be called with a conditional -+branch-and-link instruction, which has a 21-bit offset from -+the call site. + + For ARC, a function marked with the @code{long_call} attribute is + always called using register-indirect jump-and-link instructions, +@@ -3212,179 +3305,134 @@ the call site. A function marked with the @code{short_call} + attribute will always be close enough to be called with a conditional + branch-and-link instruction, which has a 21-bit offset from + the call site. +@end table -+ + +-@item longcall +-@itemx shortcall +-@cindex indirect calls, Blackfin +-@cindex indirect calls, PowerPC +-@cindex @code{longcall} function attribute, Blackfin +-@cindex @code{longcall} function attribute, PowerPC +-@cindex @code{shortcall} function attribute, Blackfin +-@cindex @code{shortcall} function attribute, PowerPC +-On Blackfin and PowerPC, the @code{longcall} attribute +-indicates that the function might be far away from the call site and +-require a different (more expensive) calling sequence. The +-@code{shortcall} attribute indicates that the function is always close +-enough for the shorter calling sequence to be used. These attributes +-override both the @option{-mlongcall} switch and, on the RS/6000 and +-PowerPC, the @code{#pragma longcall} setting. +- +-@xref{RS/6000 and PowerPC Options}, for more information on whether long +-calls are necessary. +- +-@item long_call +-@itemx near +-@itemx far +-@cindex indirect calls, MIPS +-@cindex @code{long_call} function attribute, MIPS +-@cindex @code{near} function attribute, MIPS +-@cindex @code{far} function attribute, MIPS +-These attributes specify how a particular function is called on MIPS@. +-The attributes override the @option{-mlong-calls} (@pxref{MIPS Options}) +-command-line switch. The @code{long_call} and @code{far} attributes are +-synonyms, and cause the compiler to always call +-the function by first loading its address into a register, and then using +-the contents of that register. The @code{near} attribute has the opposite +-effect; it specifies that non-PIC calls should be made using the more +-efficient @code{jal} instruction. +- +-@item malloc +-@cindex @code{malloc} function attribute +-This tells the compiler that a function is @code{malloc}-like, i.e., +-that the pointer @var{P} returned by the function cannot alias any +-other pointer valid when the function returns, and moreover no +-pointers to valid objects occur in any storage addressed by @var{P}. +@node ARM Function Attributes +@subsection ARM Function Attributes -+ + +-Using this attribute can improve optimization. Functions like +-@code{malloc} and @code{calloc} have this property because they return +-a pointer to uninitialized or zeroed-out storage. However, functions +-like @code{realloc} do not have this property, as they can return a +-pointer to storage containing pointers. +These function attributes are supported for ARM targets: -+ + +-@item mips16 +-@itemx nomips16 +-@cindex @code{mips16} function attribute, MIPS +-@cindex @code{nomips16} function attribute, MIPS +@table @code +@item interrupt +@cindex @code{interrupt} function attribute, ARM @@ -1510,26 +1632,58 @@ +that the specified function is an interrupt handler. The compiler generates +function entry and exit sequences suitable for use in an interrupt handler +when this attribute is present. -+ + +-On MIPS targets, you can use the @code{mips16} and @code{nomips16} +-function attributes to locally select or turn off MIPS16 code generation. +-A function with the @code{mips16} attribute is emitted as MIPS16 code, +-while MIPS16 code generation is disabled for functions with the +-@code{nomips16} attribute. These attributes override the +-@option{-mips16} and @option{-mno-mips16} options on the command line +-(@pxref{MIPS Options}). +You can specify the kind of interrupt to be handled by +adding an optional parameter to the interrupt attribute like this: -+ + +-When compiling files containing mixed MIPS16 and non-MIPS16 code, the +-preprocessor symbol @code{__mips16} reflects the setting on the command line, +-not that within individual functions. Mixed MIPS16 and non-MIPS16 code +-may interact badly with some GCC extensions such as @code{__builtin_apply} +-(@pxref{Constructing Calls}). +@smallexample +void f () __attribute__ ((interrupt ("IRQ"))); +@end smallexample -+ + +-@item micromips, MIPS +-@itemx nomicromips, MIPS +-@cindex @code{micromips} function attribute +-@cindex @code{nomicromips} function attribute +@noindent +Permissible values for this parameter are: @code{IRQ}, @code{FIQ}, +@code{SWI}, @code{ABORT} and @code{UNDEF}. -+ + +-On MIPS targets, you can use the @code{micromips} and @code{nomicromips} +-function attributes to locally select or turn off microMIPS code generation. +-A function with the @code{micromips} attribute is emitted as microMIPS code, +-while microMIPS code generation is disabled for functions with the +-@code{nomicromips} attribute. These attributes override the +-@option{-mmicromips} and @option{-mno-micromips} options on the command line +-(@pxref{MIPS Options}). +On ARMv7-M the interrupt type is ignored, and the attribute means the function +may be called with a word-aligned stack pointer. -+ + +-When compiling files containing mixed microMIPS and non-microMIPS code, the +-preprocessor symbol @code{__mips_micromips} reflects the setting on the +-command line, +-not that within individual functions. Mixed microMIPS and non-microMIPS code +-may interact badly with some GCC extensions such as @code{__builtin_apply} +-(@pxref{Constructing Calls}). +@item isr +@cindex @code{isr} function attribute, ARM +Use this attribute on ARM to write Interrupt Service Routines. This is an +alias to the @code{interrupt} attribute above. -+ + +-@item model (@var{model-name}) +-@cindex @code{model} function attribute, M32R/D +-@cindex function addressability on the M32R/D +@item long_call +@itemx short_call +@cindex @code{long_call} function attribute, ARM @@ -1544,7 +1698,11 @@ +calling sequence. The @code{short_call} attribute always places +the offset to the function from the call site into the @samp{BL} +instruction directly. -+ + +-On the M32R/D, use this attribute to set the addressability of an +-object, and of the code generated for a function. The identifier +-@var{model-name} is one of @code{small}, @code{medium}, or +-@code{large}, representing each of the code models. +@item naked +@cindex @code{naked} function attribute, ARM +This attribute allows the compiler to construct the @@ -1555,35 +1713,105 @@ +(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of +basic @code{asm} and C code may appear to work, they cannot be +depended upon to work reliably and are not supported. -+ + +-Small model objects live in the lower 16MB of memory (so that their +-addresses can be loaded with the @code{ld24} instruction), and are +-callable with the @code{bl} instruction. +@item pcs +@cindex @code{pcs} function attribute, ARM -+ + +-Medium model objects may live anywhere in the 32-bit address space (the +-compiler generates @code{seth/add3} instructions to load their addresses), +-and are callable with the @code{bl} instruction. +The @code{pcs} attribute can be used to control the calling convention +used for a function on ARM. The attribute takes an argument that specifies +the calling convention to use. -+ + +-Large model objects may live anywhere in the 32-bit address space (the +-compiler generates @code{seth/add3} instructions to load their addresses), +-and may not be reachable with the @code{bl} instruction (the compiler +-generates the much slower @code{seth/add3/jl} instruction sequence). +When compiling using the AAPCS ABI (or a variant of it) then valid +values for the argument are @code{"aapcs"} and @code{"aapcs-vfp"}. In +order to use a variant other than @code{"aapcs"} then the compiler must +be permitted to use the appropriate co-processor registers (i.e., the +VFP registers must be available in order to use @code{"aapcs-vfp"}). +For example, -+ + +-@item ms_abi +-@itemx sysv_abi +-@cindex @code{ms_abi} function attribute, x86 +-@cindex @code{sysv_abi} function attribute, x86 +@smallexample +/* Argument passed in r0, and result returned in r0+r1. */ +double f2d (float) __attribute__((pcs("aapcs"))); - @end smallexample ++@end smallexample +-On 32-bit and 64-bit x86 targets, you can use an ABI attribute +-to indicate which calling convention should be used for a function. The +-@code{ms_abi} attribute tells the compiler to use the Microsoft ABI, +-while the @code{sysv_abi} attribute tells the compiler to use the ABI +-used on GNU/Linux and other systems. The default is to use the Microsoft ABI +-when targeting Windows. On all other systems, the default is the x86/AMD ABI. +Variadic functions always use the @code{"aapcs"} calling convention and +the compiler rejects attempts to specify an alternative. + +-Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently +-requires the @option{-maccumulate-outgoing-args} option. ++@item target (@var{options}) ++@cindex @code{target} function attribute ++As discussed in @ref{Common Function Attributes}, this attribute ++allows specification of target-specific compilation options. + +-@item callee_pop_aggregate_return (@var{number}) +-@cindex @code{callee_pop_aggregate_return} function attribute, x86 ++On ARM, the following options are allowed: + +-On x86-32 targets, you can use this attribute to control how +-aggregates are returned in memory. If the caller is responsible for +-popping the hidden pointer together with the rest of the arguments, specify +-@var{number} equal to zero. If callee is responsible for popping the +-hidden pointer, specify @var{number} equal to one. ++@table @samp ++@item thumb ++@cindex @code{target("thumb")} function attribute, ARM ++Force code generation in the Thumb (T16/T32) ISA, depending on the ++architecture level. ++ ++@item arm ++@cindex @code{target("arm")} function attribute, ARM ++Force code generation in the ARM (A32) ISA. +@end table -+ + +-The default x86-32 ABI assumes that the callee pops the +-stack for hidden pointer. However, on x86-32 Microsoft Windows targets, +-the compiler assumes that the +-caller pops the stack for hidden pointer. ++Functions from different modes can be inlined in the caller's mode. + +-@item ms_hook_prologue +-@cindex @code{ms_hook_prologue} function attribute, x86 ++@end table + +-On 32-bit and 64-bit x86 targets, you can use +-this function attribute to make GCC generate the ``hot-patching'' function +-prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2 +-and newer. +@node AVR Function Attributes +@subsection AVR Function Attributes -+ + +-@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label}) +-@cindex @code{hotpatch} function attribute, S/390 +These function attributes are supported by the AVR back end: -+ + +-On S/390 System z targets, you can use this function attribute to +-make GCC generate a ``hot-patching'' function prologue. If the +-@option{-mhotpatch=} command-line option is used at the same time, +-the @code{hotpatch} attribute takes precedence. The first of the +-two arguments specifies the number of halfwords to be added before +-the function label. A second argument can be used to specify the +-number of halfwords to be added after the function label. For +-both arguments the maximum allowed value is 1000000. +@table @code +@item interrupt +@cindex @code{interrupt} function attribute, AVR @@ -1591,7 +1819,8 @@ +that the specified function is an interrupt handler. The compiler generates +function entry and exit sequences suitable for use in an interrupt handler +when this attribute is present. -+ + +-If both arguments are zero, hotpatching is disabled. +On the AVR, the hardware globally disables interrupts when an +interrupt is executed. The first instruction of an interrupt handler +declared with this attribute is a @code{SEI} instruction to @@ -1599,18 +1828,33 @@ +that does not insert a @code{SEI} instruction. If both @code{signal} and +@code{interrupt} are specified for the same function, @code{signal} +is silently ignored. -+ -+@item naked -+@cindex @code{naked} function attribute, AVR + + @item naked +-@cindex @code{naked} function attribute, ARM + @cindex @code{naked} function attribute, AVR +-@cindex @code{naked} function attribute, MCORE +-@cindex @code{naked} function attribute, MSP430 +-@cindex @code{naked} function attribute, NDS32 +-@cindex @code{naked} function attribute, RL78 +-@cindex @code{naked} function attribute, RX +-@cindex @code{naked} function attribute, SPU +-@cindex function without prologue/epilogue code +-This attribute is available on the ARM, AVR, MCORE, MSP430, NDS32, +-RL78, RX and SPU ports. It allows the compiler to construct the +This attribute allows the compiler to construct the -+requisite function declaration, while allowing the body of the -+function to be assembly code. The specified function will not have -+prologue/epilogue sequences generated by the compiler. Only basic -+@code{asm} statements can safely be included in naked functions -+(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of -+basic @code{asm} and C code may appear to work, they cannot be -+depended upon to work reliably and are not supported. -+ + requisite function declaration, while allowing the body of the + function to be assembly code. The specified function will not have + prologue/epilogue sequences generated by the compiler. Only basic +@@ -3393,12 +3441,108 @@ prologue/epilogue sequences generated by the compiler. Only basic + basic @code{asm} and C code may appear to work, they cannot be + depended upon to work reliably and are not supported. + +-@item near +-@cindex @code{near} function attribute, MeP +-@cindex functions that do not handle memory bank switching on 68HC11/68HC12 +-On MeP targets this attribute causes the compiler to assume the called +-function is close enough to use the normal calling convention, +-overriding the @option{-mtf} command-line option. +@item OS_main +@itemx OS_task +@cindex @code{OS_main} function attribute, AVR @@ -1674,313 +1918,65 @@ +exit sequences suitable for use in an exception handler when this +attribute is present. + - @item interrupt_handler - @cindex @code{interrupt_handler} function attribute, Blackfin --@cindex @code{interrupt_handler} function attribute, m68k --@cindex @code{interrupt_handler} function attribute, H8/300 --@cindex @code{interrupt_handler} function attribute, SH --Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to ++@item interrupt_handler ++@cindex @code{interrupt_handler} function attribute, Blackfin +Use this attribute to - indicate that the specified function is an interrupt handler. The compiler - generates function entry and exit sequences suitable for use in an - interrupt handler when this attribute is present. - --@item interrupt_thread --@cindex @code{interrupt_thread} function attribute, fido --Use this attribute on fido, a subarchitecture of the m68k, to indicate --that the specified function is an interrupt handler that is designed --to run as a thread. The compiler omits generate prologue/epilogue --sequences and replaces the return instruction with a @code{sleep} --instruction. This attribute is available only on fido. -- --@item isr --@cindex @code{isr} function attribute, ARM --Use this attribute on ARM to write Interrupt Service Routines. This is an --alias to the @code{interrupt} attribute above. -- - @item kspisusp - @cindex @code{kspisusp} function attribute, Blackfin - @cindex User stack pointer in interrupts on the Blackfin -@@ -3147,258 +3505,22 @@ or caller uses inlined PLT. - - @item l2 - @cindex @code{l2} function attribute, Blackfin --On the Blackfin, this attribute specifies a function to be placed into L2 ++indicate that the specified function is an interrupt handler. The compiler ++generates function entry and exit sequences suitable for use in an ++interrupt handler when this attribute is present. ++ ++@item kspisusp ++@cindex @code{kspisusp} function attribute, Blackfin ++@cindex User stack pointer in interrupts on the Blackfin ++When used together with @code{interrupt_handler}, @code{exception_handler} ++or @code{nmi_handler}, code is generated to load the stack pointer ++from the USP register in the function prologue. ++ ++@item l1_text ++@cindex @code{l1_text} function attribute, Blackfin ++This attribute specifies a function to be placed into L1 Instruction ++SRAM@. The function is put into a specific section named @code{.l1.text}. ++With @option{-mfdpic}, function calls with a such function as the callee ++or caller uses inlined PLT. ++ ++@item l2 ++@cindex @code{l2} function attribute, Blackfin +This attribute specifies a function to be placed into L2 - SRAM. The function is put into a specific section named --@code{.l1.text}. With @option{-mfdpic}, callers of such functions use ++SRAM. The function is put into a specific section named +@code{.l2.text}. With @option{-mfdpic}, callers of such functions use - an inlined PLT. - --@item leaf --@cindex @code{leaf} function attribute --Calls to external functions with this attribute must return to the current --compilation unit only by return or by exception handling. In particular, leaf --functions are not allowed to call callback function passed to it from the current --compilation unit or directly call functions exported by the unit or longjmp --into the unit. Leaf function might still call functions from other compilation --units and thus they are not necessarily leaf in the sense that they contain no --function calls at all. -- --The attribute is intended for library functions to improve dataflow analysis. --The compiler takes the hint that any data not escaping the current compilation unit can --not be used or modified by the leaf function. For example, the @code{sin} function --is a leaf function, but @code{qsort} is not. -- --Note that leaf functions might invoke signals and signal handlers might be --defined in the current compilation unit and use static variables. The only --compliant way to write such a signal handler is to declare such variables --@code{volatile}. -- --The attribute has no effect on functions defined within the current compilation --unit. This is to allow easy merging of multiple compilation units into one, --for example, by using the link-time optimization. For this reason the --attribute is not allowed on types to annotate indirect calls. -- --@item long_call --@itemx medium_call --@itemx short_call --@cindex @code{long_call} function attribute, ARC --@cindex @code{long_call} function attribute, ARM --@cindex @code{long_call} function attribute, Epiphany --@cindex @code{medium_call} function attribute, ARC --@cindex @code{short_call} function attribute, ARC --@cindex @code{short_call} function attribute, ARM --@cindex @code{short_call} function attribute, Epiphany --@cindex indirect calls, ARC --@cindex indirect calls, ARM --@cindex indirect calls, Epiphany --These attributes specify how a particular function is called on --ARC, ARM and Epiphany - with @code{medium_call} being specific to ARC. --These attributes override the --@option{-mlong-calls} (@pxref{ARM Options} and @ref{ARC Options}) --and @option{-mmedium-calls} (@pxref{ARC Options}) --command-line switches and @code{#pragma long_calls} settings. For ARM, the --@code{long_call} attribute indicates that the function might be far --away from the call site and require a different (more expensive) --calling sequence. The @code{short_call} attribute always places --the offset to the function from the call site into the @samp{BL} --instruction directly. -- --For ARC, a function marked with the @code{long_call} attribute is --always called using register-indirect jump-and-link instructions, --thereby enabling the called function to be placed anywhere within the --32-bit address space. A function marked with the @code{medium_call} --attribute will always be close enough to be called with an unconditional --branch-and-link instruction, which has a 25-bit offset from --the call site. A function marked with the @code{short_call} --attribute will always be close enough to be called with a conditional --branch-and-link instruction, which has a 21-bit offset from --the call site. -- - @item longcall - @itemx shortcall - @cindex indirect calls, Blackfin --@cindex indirect calls, PowerPC - @cindex @code{longcall} function attribute, Blackfin --@cindex @code{longcall} function attribute, PowerPC - @cindex @code{shortcall} function attribute, Blackfin --@cindex @code{shortcall} function attribute, PowerPC --On Blackfin and PowerPC, the @code{longcall} attribute ++an inlined PLT. ++ ++@item longcall ++@itemx shortcall ++@cindex indirect calls, Blackfin ++@cindex @code{longcall} function attribute, Blackfin ++@cindex @code{shortcall} function attribute, Blackfin +The @code{longcall} attribute - indicates that the function might be far away from the call site and - require a different (more expensive) calling sequence. The - @code{shortcall} attribute indicates that the function is always close - enough for the shorter calling sequence to be used. These attributes --override both the @option{-mlongcall} switch and, on the RS/6000 and --PowerPC, the @code{#pragma longcall} setting. -- --@xref{RS/6000 and PowerPC Options}, for more information on whether long --calls are necessary. -- --@item long_call --@itemx near --@itemx far --@cindex indirect calls, MIPS --@cindex @code{long_call} function attribute, MIPS --@cindex @code{near} function attribute, MIPS --@cindex @code{far} function attribute, MIPS --These attributes specify how a particular function is called on MIPS@. --The attributes override the @option{-mlong-calls} (@pxref{MIPS Options}) --command-line switch. The @code{long_call} and @code{far} attributes are --synonyms, and cause the compiler to always call --the function by first loading its address into a register, and then using --the contents of that register. The @code{near} attribute has the opposite --effect; it specifies that non-PIC calls should be made using the more --efficient @code{jal} instruction. -- --@item malloc --@cindex @code{malloc} function attribute --This tells the compiler that a function is @code{malloc}-like, i.e., --that the pointer @var{P} returned by the function cannot alias any --other pointer valid when the function returns, and moreover no --pointers to valid objects occur in any storage addressed by @var{P}. -- --Using this attribute can improve optimization. Functions like --@code{malloc} and @code{calloc} have this property because they return --a pointer to uninitialized or zeroed-out storage. However, functions --like @code{realloc} do not have this property, as they can return a --pointer to storage containing pointers. -- --@item mips16 --@itemx nomips16 --@cindex @code{mips16} function attribute, MIPS --@cindex @code{nomips16} function attribute, MIPS -- --On MIPS targets, you can use the @code{mips16} and @code{nomips16} --function attributes to locally select or turn off MIPS16 code generation. --A function with the @code{mips16} attribute is emitted as MIPS16 code, --while MIPS16 code generation is disabled for functions with the --@code{nomips16} attribute. These attributes override the --@option{-mips16} and @option{-mno-mips16} options on the command line --(@pxref{MIPS Options}). -- --When compiling files containing mixed MIPS16 and non-MIPS16 code, the --preprocessor symbol @code{__mips16} reflects the setting on the command line, --not that within individual functions. Mixed MIPS16 and non-MIPS16 code --may interact badly with some GCC extensions such as @code{__builtin_apply} --(@pxref{Constructing Calls}). -- --@item micromips, MIPS --@itemx nomicromips, MIPS --@cindex @code{micromips} function attribute --@cindex @code{nomicromips} function attribute ++indicates that the function might be far away from the call site and ++require a different (more expensive) calling sequence. The ++@code{shortcall} attribute indicates that the function is always close ++enough for the shorter calling sequence to be used. These attributes ++override the @option{-mlongcall} switch. + + @item nesting + @cindex @code{nesting} function attribute, Blackfin +@@ -3415,410 +3559,349 @@ is an NMI handler. The compiler generates function entry and + exit sequences suitable for use in an NMI handler when this + attribute is present. + +-@item nocompression +-@cindex @code{nocompression} function attribute, MIPS +-On MIPS targets, you can use the @code{nocompression} function attribute +-to locally turn off MIPS16 and microMIPS code generation. This attribute +-overrides the @option{-mips16} and @option{-mmicromips} options on the +-command line (@pxref{MIPS Options}). - --On MIPS targets, you can use the @code{micromips} and @code{nomicromips} --function attributes to locally select or turn off microMIPS code generation. --A function with the @code{micromips} attribute is emitted as microMIPS code, --while microMIPS code generation is disabled for functions with the --@code{nomicromips} attribute. These attributes override the --@option{-mmicromips} and @option{-mno-micromips} options on the command line --(@pxref{MIPS Options}). -- --When compiling files containing mixed microMIPS and non-microMIPS code, the --preprocessor symbol @code{__mips_micromips} reflects the setting on the --command line, --not that within individual functions. Mixed microMIPS and non-microMIPS code --may interact badly with some GCC extensions such as @code{__builtin_apply} --(@pxref{Constructing Calls}). -- --@item model (@var{model-name}) --@cindex @code{model} function attribute, M32R/D --@cindex function addressability on the M32R/D -- --On the M32R/D, use this attribute to set the addressability of an --object, and of the code generated for a function. The identifier --@var{model-name} is one of @code{small}, @code{medium}, or --@code{large}, representing each of the code models. -- --Small model objects live in the lower 16MB of memory (so that their --addresses can be loaded with the @code{ld24} instruction), and are --callable with the @code{bl} instruction. -- --Medium model objects may live anywhere in the 32-bit address space (the --compiler generates @code{seth/add3} instructions to load their addresses), --and are callable with the @code{bl} instruction. -- --Large model objects may live anywhere in the 32-bit address space (the --compiler generates @code{seth/add3} instructions to load their addresses), --and may not be reachable with the @code{bl} instruction (the compiler --generates the much slower @code{seth/add3/jl} instruction sequence). -- --@item ms_abi --@itemx sysv_abi --@cindex @code{ms_abi} function attribute, x86 --@cindex @code{sysv_abi} function attribute, x86 -- --On 32-bit and 64-bit x86 targets, you can use an ABI attribute --to indicate which calling convention should be used for a function. The --@code{ms_abi} attribute tells the compiler to use the Microsoft ABI, --while the @code{sysv_abi} attribute tells the compiler to use the ABI --used on GNU/Linux and other systems. The default is to use the Microsoft ABI --when targeting Windows. On all other systems, the default is the x86/AMD ABI. -- --Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently --requires the @option{-maccumulate-outgoing-args} option. -- --@item callee_pop_aggregate_return (@var{number}) --@cindex @code{callee_pop_aggregate_return} function attribute, x86 -- --On x86-32 targets, you can use this attribute to control how --aggregates are returned in memory. If the caller is responsible for --popping the hidden pointer together with the rest of the arguments, specify --@var{number} equal to zero. If callee is responsible for popping the --hidden pointer, specify @var{number} equal to one. -- --The default x86-32 ABI assumes that the callee pops the --stack for hidden pointer. However, on x86-32 Microsoft Windows targets, --the compiler assumes that the --caller pops the stack for hidden pointer. -- --@item ms_hook_prologue --@cindex @code{ms_hook_prologue} function attribute, x86 -- --On 32-bit and 64-bit x86 targets, you can use --this function attribute to make GCC generate the ``hot-patching'' function --prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2 --and newer. -- --@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label}) --@cindex @code{hotpatch} function attribute, S/390 -- --On S/390 System z targets, you can use this function attribute to --make GCC generate a ``hot-patching'' function prologue. If the --@option{-mhotpatch=} command-line option is used at the same time, --the @code{hotpatch} attribute takes precedence. The first of the --two arguments specifies the number of halfwords to be added before --the function label. A second argument can be used to specify the --number of halfwords to be added after the function label. For --both arguments the maximum allowed value is 1000000. -- --If both arguments are zero, hotpatching is disabled. -- --@item naked --@cindex @code{naked} function attribute, ARM --@cindex @code{naked} function attribute, AVR --@cindex @code{naked} function attribute, MCORE --@cindex @code{naked} function attribute, MSP430 --@cindex @code{naked} function attribute, NDS32 --@cindex @code{naked} function attribute, RL78 --@cindex @code{naked} function attribute, RX --@cindex @code{naked} function attribute, SPU --@cindex function without prologue/epilogue code --This attribute is available on the ARM, AVR, MCORE, MSP430, NDS32, --RL78, RX and SPU ports. It allows the compiler to construct the --requisite function declaration, while allowing the body of the --function to be assembly code. The specified function will not have --prologue/epilogue sequences generated by the compiler. Only basic --@code{asm} statements can safely be included in naked functions --(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of --basic @code{asm} and C code may appear to work, they cannot be --depended upon to work reliably and are not supported. -- --@item near --@cindex @code{near} function attribute, MeP --@cindex functions that do not handle memory bank switching on 68HC11/68HC12 --On MeP targets this attribute causes the compiler to assume the called --function is close enough to use the normal calling convention, --overriding the @option{-mtf} command-line option. -+override the @option{-mlongcall} switch. - - @item nesting - @cindex @code{nesting} function attribute, Blackfin -@@ -3415,410 +3537,349 @@ is an NMI handler. The compiler generates function entry and - exit sequences suitable for use in an NMI handler when this - attribute is present. - --@item nocompression --@cindex @code{nocompression} function attribute, MIPS --On MIPS targets, you can use the @code{nocompression} function attribute --to locally turn off MIPS16 and microMIPS code generation. This attribute --overrides the @option{-mips16} and @option{-mmicromips} options on the --command line (@pxref{MIPS Options}). -- --@item no_instrument_function --@cindex @code{no_instrument_function} function attribute --@opindex finstrument-functions --If @option{-finstrument-functions} is given, profiling function calls are --generated at entry and exit of most user-compiled functions. --Functions with this attribute are not so instrumented. +-@item no_instrument_function +-@cindex @code{no_instrument_function} function attribute +-@opindex finstrument-functions +-If @option{-finstrument-functions} is given, profiling function calls are +-generated at entry and exit of most user-compiled functions. +-Functions with this attribute are not so instrumented. - -@item no_split_stack -@cindex @code{no_split_stack} function attribute @@ -2582,46 +2578,37 @@ +On MeP targets, this attribute causes the compiler to emit +instructions to disable interrupts for the duration of the given +function. - --@item resbank --@cindex @code{resbank} function attribute, SH --On the SH2A target, this attribute enables the high-speed register --saving and restoration using a register bank for @code{interrupt_handler} --routines. Saving to the bank is performed automatically after the CPU --accepts an interrupt that uses a register bank. ++ +@item interrupt +@cindex @code{interrupt} function attribute, MeP +Use this attribute to indicate +that the specified function is an interrupt handler. The compiler generates +function entry and exit sequences suitable for use in an interrupt handler +when this attribute is present. - --The nineteen 32-bit registers comprising general register R0 to R14, --control register GBR, and system registers MACH, MACL, and PR and the --vector table address offset are saved into a register bank. Register --banks are stacked in first-in last-out (FILO) sequence. Restoration --from the bank is executed by issuing a RESBANK instruction. ++ +@item near +@cindex @code{near} function attribute, MeP +This attribute causes the compiler to assume the called +function is close enough to use the normal calling convention, +overriding the @option{-mtf} command-line option. --@item returns_twice --@cindex @code{returns_twice} function attribute --The @code{returns_twice} attribute tells the compiler that a function may --return more than one time. The compiler ensures that all registers --are dead before calling such a function and emits a warning about --the variables that may be clobbered after the second return from the --function. Examples of such functions are @code{setjmp} and @code{vfork}. --The @code{longjmp}-like counterpart of such function, if any, might need --to be marked with the @code{noreturn} attribute. +-@item resbank +-@cindex @code{resbank} function attribute, SH +-On the SH2A target, this attribute enables the high-speed register +-saving and restoration using a register bank for @code{interrupt_handler} +-routines. Saving to the bank is performed automatically after the CPU +-accepts an interrupt that uses a register bank. +@item far +@cindex @code{far} function attribute, MeP +On MeP targets this causes the compiler to use a calling convention +that assumes the called function is too far away for the built-in +addressing modes. -+ + +-The nineteen 32-bit registers comprising general register R0 to R14, +-control register GBR, and system registers MACH, MACL, and PR and the +-vector table address offset are saved into a register bank. Register +-banks are stacked in first-in last-out (FILO) sequence. Restoration +-from the bank is executed by issuing a RESBANK instruction. +@item vliw +@cindex @code{vliw} function attribute, MeP +The @code{vliw} attribute tells the compiler to emit @@ -2630,6 +2617,18 @@ +and enabled through command-line options. +@end table +-@item returns_twice +-@cindex @code{returns_twice} function attribute +-The @code{returns_twice} attribute tells the compiler that a function may +-return more than one time. The compiler ensures that all registers +-are dead before calling such a function and emits a warning about +-the variables that may be clobbered after the second return from the +-function. Examples of such functions are @code{setjmp} and @code{vfork}. +-The @code{longjmp}-like counterpart of such function, if any, might need +-to be marked with the @code{noreturn} attribute. ++@node MicroBlaze Function Attributes ++@subsection MicroBlaze Function Attributes + -@item saveall -@cindex @code{saveall} function attribute, Blackfin -@cindex @code{saveall} function attribute, H8/300 @@ -2637,9 +2636,6 @@ -Use this attribute on the Blackfin, H8/300, H8/300H, and H8S to indicate that -all registers except the stack pointer should be saved in the prologue -regardless of whether they are used or not. -+@node MicroBlaze Function Attributes -+@subsection MicroBlaze Function Attributes -+ +These function attributes are supported on MicroBlaze targets: +@table @code @@ -2650,7 +2646,7 @@ an interrupt handler. All volatile registers (in addition to non-volatile registers) are saved in the function prologue. If the function is a leaf function, only volatiles used by the function are saved. A normal function -@@ -3827,7 +3888,7 @@ return is generated instead of a return from interrupt. +@@ -3827,7 +3910,7 @@ return is generated instead of a return from interrupt. @item break_handler @cindex @code{break_handler} function attribute, MicroBlaze @cindex break handler functions @@ -2659,7 +2655,7 @@ the specified function is a break handler. The compiler generates function entry and exit sequences suitable for use in an break handler when this attribute is present. The return from @code{break_handler} is done through -@@ -3836,290 +3897,404 @@ the @code{rtbd} instead of @code{rtsd}. +@@ -3836,290 +3919,404 @@ the @code{rtbd} instead of @code{rtsd}. @smallexample void f () __attribute__ ((break_handler)); @end smallexample @@ -2895,11 +2891,9 @@ +Return using the @code{deret} instruction. Interrupt handlers that don't +have this attribute return using @code{eret} instead. +@end table - ++ +You can use any combination of these attributes, as shown below: - @smallexample --int core2_func (void) __attribute__ ((__target__ ("arch=core2"))); --int sse3_func (void) __attribute__ ((__target__ ("sse3"))); ++@smallexample +void __attribute__ ((interrupt)) v0 (); +void __attribute__ ((interrupt, use_shadow_register_set)) v1 (); +void __attribute__ ((interrupt, keep_interrupts_masked)) v2 (); @@ -2913,11 +2907,8 @@ +void __attribute__ ((interrupt, use_shadow_register_set, + keep_interrupts_masked, + use_debug_exception_return)) v7 (); - @end smallexample - --You can either use multiple --strings to specify multiple options, or separate the options --with a comma (@samp{,}). ++@end smallexample ++ +@item long_call +@itemx near +@itemx far @@ -2934,15 +2925,18 @@ +effect; it specifies that non-PIC calls should be made using the more +efficient @code{jal} instruction. --The @code{target} attribute is presently implemented for --x86, PowerPC, and Nios II targets only. --The options supported are specific to each target. +-@smallexample +-int core2_func (void) __attribute__ ((__target__ ("arch=core2"))); +-int sse3_func (void) __attribute__ ((__target__ ("sse3"))); +-@end smallexample +@item mips16 +@itemx nomips16 +@cindex @code{mips16} function attribute, MIPS +@cindex @code{nomips16} function attribute, MIPS --On the x86, the following options are allowed: +-You can either use multiple +-strings to specify multiple options, or separate the options +-with a comma (@samp{,}). +On MIPS targets, you can use the @code{mips16} and @code{nomips16} +function attributes to locally select or turn off MIPS16 code generation. +A function with the @code{mips16} attribute is emitted as MIPS16 code, @@ -2950,13 +2944,17 @@ +@code{nomips16} attribute. These attributes override the +@option{-mips16} and @option{-mno-mips16} options on the command line +(@pxref{MIPS Options}). -+ + +-The @code{target} attribute is presently implemented for +-x86, PowerPC, and Nios II targets only. +-The options supported are specific to each target. +When compiling files containing mixed MIPS16 and non-MIPS16 code, the +preprocessor symbol @code{__mips16} reflects the setting on the command line, +not that within individual functions. Mixed MIPS16 and non-MIPS16 code +may interact badly with some GCC extensions such as @code{__builtin_apply} +(@pxref{Constructing Calls}). -+ + +-On the x86, the following options are allowed: +@item micromips, MIPS +@itemx nomicromips, MIPS +@cindex @code{micromips} function attribute @@ -3293,16 +3291,12 @@ On the PowerPC, the following options are allowed: -@@ -4293,35 +4468,193 @@ compilation tunes for the @var{CPU} architecture, and not the +@@ -4293,666 +4490,535 @@ compilation tunes for the @var{CPU} architecture, and not the default tuning specified on the command line. @end table -When compiling for Nios II, the following options are allowed: -+On the PowerPC, the inliner does not inline a -+function that has different target options than the caller, unless the -+callee has a subset of the target options of the caller. -+@end table - +- -@table @samp -@item custom-@var{insn}=@var{N} -@itemx no-custom-@var{insn} @@ -3316,66 +3310,229 @@ -@option{-mcustom-@var{insn}=@var{N}} and @option{-mno-custom-@var{insn}} -command-line options, and support the same set of @var{insn} keywords. -@xref{Nios II Options}, for more information. -+@node RL78 Function Attributes -+@subsection RL78 Function Attributes - +- -@item custom-fpu-cfg=@var{name} -@cindex @code{target("custom-fpu-cfg=@var{name}")} function attribute, Nios II -This attribute corresponds to the @option{-mcustom-fpu-cfg=@var{name}} -command-line option, to select a predefined set of custom instructions -named @var{name}. -@xref{Nios II Options}, for more information. -+These function attributes are supported by the RL78 back end: -+ -+@table @code -+@item interrupt -+@itemx brk_interrupt -+@cindex @code{interrupt} function attribute, RL78 -+@cindex @code{brk_interrupt} function attribute, RL78 -+These attributes indicate -+that the specified function is an interrupt handler. The compiler generates -+function entry and exit sequences suitable for use in an interrupt handler -+when this attribute is present. -+ -+Use @code{brk_interrupt} instead of @code{interrupt} for -+handlers intended to be used with the @code{BRK} opcode (i.e.@: those -+that must end with @code{RETB} instead of @code{RETI}). -+ -+@item naked -+@cindex @code{naked} function attribute, RL78 -+This attribute allows the compiler to construct the -+requisite function declaration, while allowing the body of the -+function to be assembly code. The specified function will not have -+prologue/epilogue sequences generated by the compiler. Only basic -+@code{asm} statements can safely be included in naked functions -+(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of -+basic @code{asm} and C code may appear to work, they cannot be -+depended upon to work reliably and are not supported. - @end table - +-@end table +- -On the x86 and PowerPC back ends, the inliner does not inline a --function that has different target options than the caller, unless the ++On the PowerPC, the inliner does not inline a + function that has different target options than the caller, unless the -callee has a subset of the target options of the caller. For example -a function declared with @code{target("sse3")} can inline a function -with @code{target("sse2")}, since @code{-msse3} implies @code{-msse2}. -+@node RX Function Attributes -+@subsection RX Function Attributes -+ -+These function attributes are supported by the RX back end: -+ -+@table @code -+@item fast_interrupt -+@cindex @code{fast_interrupt} function attribute, RX -+Use this attribute on the RX port to indicate that the specified -+function is a fast interrupt handler. This is just like the -+@code{interrupt} attribute, except that @code{freit} is used to return -+instead of @code{reit}. -+ -+@item interrupt -+@cindex @code{interrupt} function attribute, RX -+Use this attribute to indicate -+that the specified function is an interrupt handler. The compiler generates -+function entry and exit sequences suitable for use in an interrupt handler +- +-@item trap_exit +-@cindex @code{trap_exit} function attribute, SH +-Use this attribute on the SH for an @code{interrupt_handler} to return using +-@code{trapa} instead of @code{rte}. This attribute expects an integer +-argument specifying the trap number to be used. +- +-@item trapa_handler +-@cindex @code{trapa_handler} function attribute, SH +-On SH targets this function attribute is similar to @code{interrupt_handler} +-but it does not save and restore all registers. +- +-@item unused +-@cindex @code{unused} function attribute +-This attribute, attached to a function, means that the function is meant +-to be possibly unused. GCC does not produce a warning for this +-function. +- +-@item used +-@cindex @code{used} function attribute +-This attribute, attached to a function, means that code must be emitted +-for the function even if it appears that the function is not referenced. +-This is useful, for example, when the function is referenced only in +-inline assembly. +- +-When applied to a member function of a C++ class template, the +-attribute also means that the function is instantiated if the +-class itself is instantiated. +- +-@item vector +-@cindex @code{vector} function attribute, RX +-This RX attribute is similar to the @code{interrupt} attribute, including its +-parameters, but does not make the function an interrupt-handler type +-function (i.e. it retains the normal C function calling ABI). See the +-@code{interrupt} attribute for a description of its arguments. +- +-@item version_id +-@cindex @code{version_id} function attribute, IA-64 +-This IA-64 HP-UX attribute, attached to a global variable or function, renames a +-symbol to contain a version string, thus allowing for function level +-versioning. HP-UX system header files may use function level versioning +-for some system calls. +- +-@smallexample +-extern int foo () __attribute__((version_id ("20040821"))); +-@end smallexample +- +-@noindent +-Calls to @var{foo} are mapped to calls to @var{foo@{20040821@}}. +- +-@item visibility ("@var{visibility_type}") +-@cindex @code{visibility} function attribute +-This attribute affects the linkage of the declaration to which it is attached. +-There are four supported @var{visibility_type} values: default, +-hidden, protected or internal visibility. +- +-@smallexample +-void __attribute__ ((visibility ("protected"))) +-f () @{ /* @r{Do something.} */; @} +-int i __attribute__ ((visibility ("hidden"))); +-@end smallexample +- +-The possible values of @var{visibility_type} correspond to the +-visibility settings in the ELF gABI. +- +-@table @dfn +-@c keep this list of visibilities in alphabetical order. +- +-@item default +-Default visibility is the normal case for the object file format. +-This value is available for the visibility attribute to override other +-options that may change the assumed visibility of entities. +- +-On ELF, default visibility means that the declaration is visible to other +-modules and, in shared libraries, means that the declared entity may be +-overridden. +- +-On Darwin, default visibility means that the declaration is visible to +-other modules. +- +-Default visibility corresponds to ``external linkage'' in the language. +- +-@item hidden +-Hidden visibility indicates that the entity declared has a new +-form of linkage, which we call ``hidden linkage''. Two +-declarations of an object with hidden linkage refer to the same object +-if they are in the same shared object. +- +-@item internal +-Internal visibility is like hidden visibility, but with additional +-processor specific semantics. Unless otherwise specified by the +-psABI, GCC defines internal visibility to mean that a function is +-@emph{never} called from another module. Compare this with hidden +-functions which, while they cannot be referenced directly by other +-modules, can be referenced indirectly via function pointers. By +-indicating that a function cannot be called from outside the module, +-GCC may for instance omit the load of a PIC register since it is known +-that the calling function loaded the correct value. +- +-@item protected +-Protected visibility is like default visibility except that it +-indicates that references within the defining module bind to the +-definition in that module. That is, the declared entity cannot be +-overridden by another module. +- ++callee has a subset of the target options of the caller. + @end table + +-All visibilities are supported on many, but not all, ELF targets +-(supported when the assembler supports the @samp{.visibility} +-pseudo-op). Default visibility is supported everywhere. Hidden +-visibility is supported on Darwin targets. +- +-The visibility attribute should be applied only to declarations that +-would otherwise have external linkage. The attribute should be applied +-consistently, so that the same entity should not be declared with +-different settings of the attribute. +- +-In C++, the visibility attribute applies to types as well as functions +-and objects, because in C++ types have linkage. A class must not have +-greater visibility than its non-static data member types and bases, +-and class members default to the visibility of their class. Also, a +-declaration without explicit visibility is limited to the visibility +-of its type. ++@node RL78 Function Attributes ++@subsection RL78 Function Attributes + +-In C++, you can mark member functions and static member variables of a +-class with the visibility attribute. This is useful if you know a +-particular method or static member variable should only be used from +-one shared object; then you can mark it hidden while the rest of the +-class has default visibility. Care must be taken to avoid breaking +-the One Definition Rule; for example, it is usually not useful to mark +-an inline method as hidden without marking the whole class as hidden. ++These function attributes are supported by the RL78 back end: + +-A C++ namespace declaration can also have the visibility attribute. ++@table @code ++@item interrupt ++@itemx brk_interrupt ++@cindex @code{interrupt} function attribute, RL78 ++@cindex @code{brk_interrupt} function attribute, RL78 ++These attributes indicate ++that the specified function is an interrupt handler. The compiler generates ++function entry and exit sequences suitable for use in an interrupt handler ++when this attribute is present. + +-@smallexample +-namespace nspace1 __attribute__ ((visibility ("protected"))) +-@{ /* @r{Do something.} */; @} +-@end smallexample ++Use @code{brk_interrupt} instead of @code{interrupt} for ++handlers intended to be used with the @code{BRK} opcode (i.e.@: those ++that must end with @code{RETB} instead of @code{RETI}). + +-This attribute applies only to the particular namespace body, not to +-other definitions of the same namespace; it is equivalent to using +-@samp{#pragma GCC visibility} before and after the namespace +-definition (@pxref{Visibility Pragmas}). ++@item naked ++@cindex @code{naked} function attribute, RL78 ++This attribute allows the compiler to construct the ++requisite function declaration, while allowing the body of the ++function to be assembly code. The specified function will not have ++prologue/epilogue sequences generated by the compiler. Only basic ++@code{asm} statements can safely be included in naked functions ++(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of ++basic @code{asm} and C code may appear to work, they cannot be ++depended upon to work reliably and are not supported. ++@end table + +-In C++, if a template argument has limited visibility, this +-restriction is implicitly propagated to the template instantiation. +-Otherwise, template instantiations and specializations default to the +-visibility of their template. ++@node RX Function Attributes ++@subsection RX Function Attributes + +-If both the template and enclosing class have explicit visibility, the +-visibility from the template is used. ++These function attributes are supported by the RX back end: + +-@item vliw +-@cindex @code{vliw} function attribute, MeP +-On MeP, the @code{vliw} attribute tells the compiler to emit +-instructions in VLIW mode instead of core mode. Note that this +-attribute is not allowed unless a VLIW coprocessor has been configured +-and enabled through command-line options. ++@table @code ++@item fast_interrupt ++@cindex @code{fast_interrupt} function attribute, RX ++Use this attribute on the RX port to indicate that the specified ++function is a fast interrupt handler. This is just like the ++@code{interrupt} attribute, except that @code{freit} is used to return ++instead of @code{reit}. + +-@item warn_unused_result +-@cindex @code{warn_unused_result} function attribute +-The @code{warn_unused_result} attribute causes a warning to be emitted +-if a caller of the function with this attribute does not use its +-return value. This is useful for functions where not checking +-the result is either a security problem or always a bug, such as +-@code{realloc}. ++@item interrupt ++@cindex @code{interrupt} function attribute, RX ++Use this attribute to indicate ++that the specified function is an interrupt handler. The compiler generates ++function entry and exit sequences suitable for use in an interrupt handler +when this attribute is present. + +On RX targets, you may specify one or more vector numbers as arguments @@ -3384,14 +3541,23 @@ +multiple entries in multiple tables. One may also pass the magic +string @code{"$default"} which causes the function to be used for any +unfilled slots in the current table. -+ + +This example shows a simple assignment of a function to one vector in +the default table (note that preprocessor macros may be used for +chip-specific symbolic vector names): -+@smallexample + @smallexample +-int fn () __attribute__ ((warn_unused_result)); +-int foo () +-@{ +- if (fn () < 0) return -1; +- fn (); +- return 0; +-@} +void __attribute__ ((interrupt (5))) txd1_handler (); -+@end smallexample -+ + @end smallexample + +-@noindent +-results in warning on line 5. +This example assigns a function to two slots in the default table +(using preprocessor macros defined elsewhere) and makes it the default +for the @code{dct} table: @@ -3399,7 +3565,15 @@ +void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default"))) + txd1_handler (); +@end smallexample -+ + +-@item weak +-@cindex @code{weak} function attribute +-The @code{weak} attribute causes the declaration to be emitted as a weak +-symbol rather than a global. This is primarily useful in defining +-library functions that can be overridden in user code, though it can +-also be used with non-function declarations. Weak symbols are supported +-for ELF targets, and also for a.out targets when using the GNU assembler +-and linker. +@item naked +@cindex @code{naked} function attribute, RX +This attribute allows the compiler to construct the @@ -3410,7 +3584,17 @@ +(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of +basic @code{asm} and C code may appear to work, they cannot be +depended upon to work reliably and are not supported. -+ + +-@item weakref +-@itemx weakref ("@var{target}") +-@cindex @code{weakref} function attribute +-The @code{weakref} attribute marks a declaration as a weak reference. +-Without arguments, it should be accompanied by an @code{alias} attribute +-naming the target symbol. Optionally, the @var{target} may be given as +-an argument to @code{weakref} itself. In either case, @code{weakref} +-implicitly marks the declaration as @code{weak}. Without a +-@var{target}, given as an argument to @code{weakref} or to @code{alias}, +-@code{weakref} is equivalent to @code{weak}. +@item vector +@cindex @code{vector} function attribute, RX +This RX attribute is similar to the @code{interrupt} attribute, including its @@ -3418,16 +3602,36 @@ +function (i.e. it retains the normal C function calling ABI). See the +@code{interrupt} attribute for a description of its arguments. +@end table -+ + +-@smallexample +-static int x() __attribute__ ((weakref ("y"))); +-/* is equivalent to... */ +-static int x() __attribute__ ((weak, weakref, alias ("y"))); +-/* and to... */ +-static int x() __attribute__ ((weakref)); +-static int x() __attribute__ ((alias ("y"))); +-@end smallexample +@node S/390 Function Attributes +@subsection S/390 Function Attributes -+ -+These function attributes are supported on the S/390: -+ -+@table @code + +-A weak reference is an alias that does not by itself require a +-definition to be given for the target symbol. If the target symbol is +-only referenced through weak references, then it becomes a @code{weak} +-undefined symbol. If it is directly referenced, however, then such +-strong references prevail, and a definition is required for the +-symbol, not necessarily in the same translation unit. ++These function attributes are supported on the S/390: + +-The effect is equivalent to moving all references to the alias to a +-separate translation unit, renaming the alias to the aliased symbol, +-declaring it as weak, compiling the two separate translation units and +-performing a reloadable link on them. ++@table @code +@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label}) +@cindex @code{hotpatch} function attribute, S/390 -+ + +-At present, a declaration to which @code{weakref} is attached can +-only be @code{static}. +On S/390 System z targets, you can use this function attribute to +make GCC generate a ``hot-patching'' function prologue. If the +@option{-mhotpatch=} command-line option is used at the same time, @@ -3436,15 +3640,27 @@ +the function label. A second argument can be used to specify the +number of halfwords to be added after the function label. For +both arguments the maximum allowed value is 1000000. -+ + +If both arguments are zero, hotpatching is disabled. -+@end table -+ + @end table + +-You can specify multiple attributes in a declaration by separating them +-by commas within the double parentheses or by immediately following an +-attribute declaration with another attribute declaration. +@node SH Function Attributes +@subsection SH Function Attributes -+ + +-@cindex @code{#pragma}, reason for not using +-@cindex pragma, reason for not using +-Some people object to the @code{__attribute__} feature, suggesting that +-ISO C's @code{#pragma} should be used instead. At the time +-@code{__attribute__} was designed, there were two reasons for not doing +-this. +These function attributes are supported on the SH family of processors: -+ + +-@enumerate +-@item +-It is impossible to generate @code{#pragma} commands from a macro. +@table @code +@item function_vector +@cindex @code{function_vector} function attribute, SH @@ -3459,38 +3675,62 @@ +entries. The jumps to these functions are generated using a SH2A specific, +non delayed branch instruction JSR/N @@(disp8,TBR). You must use GAS and GLD +from GNU binutils version 2.7 or later for this attribute to work correctly. -+ + +-@item +-There is no telling what the same @code{#pragma} might mean in another +-compiler. +-@end enumerate +In an application, for a function being called once, this attribute +saves at least 8 bytes of code; and if other successive calls are being +made to the same function, it saves 2 bytes of code per each of these +calls. -+ + +-These two reasons applied to almost any application that might have been +-proposed for @code{#pragma}. It was basically a mistake to use +-@code{#pragma} for @emph{anything}. +@item interrupt_handler +@cindex @code{interrupt_handler} function attribute, SH +Use this attribute to +indicate that the specified function is an interrupt handler. The compiler +generates function entry and exit sequences suitable for use in an +interrupt handler when this attribute is present. -+ + +-The ISO C99 standard includes @code{_Pragma}, which now allows pragmas +-to be generated from macros. In addition, a @code{#pragma GCC} +-namespace is now in use for GCC-specific pragmas. However, it has been +-found convenient to use @code{__attribute__} to achieve a natural +-attachment of attributes to their corresponding declarations, whereas +-@code{#pragma GCC} is of use for constructs that do not naturally form +-part of the grammar. @xref{Pragmas,,Pragmas Accepted by GCC}. +@item nosave_low_regs +@cindex @code{nosave_low_regs} function attribute, SH +Use this attribute on SH targets to indicate that an @code{interrupt_handler} +function should not save and restore registers R0..R7. This can be used on SH3* +and SH4* targets that have a second R0..R7 register bank for non-reentrant +interrupt handlers. -+ + +-@node Label Attributes +-@section Label Attributes +-@cindex Label Attributes +@item renesas +@cindex @code{renesas} function attribute, SH +On SH targets this attribute specifies that the function or struct follows the +Renesas ABI. -+ + +-GCC allows attributes to be set on C labels. @xref{Attribute Syntax}, for +-details of the exact syntax for using attributes. Other attributes are +-available for functions (@pxref{Function Attributes}), variables +-(@pxref{Variable Attributes}) and for types (@pxref{Type Attributes}). +@item resbank +@cindex @code{resbank} function attribute, SH +On the SH2A target, this attribute enables the high-speed register +saving and restoration using a register bank for @code{interrupt_handler} +routines. Saving to the bank is performed automatically after the CPU +accepts an interrupt that uses a register bank. -+ + +-This example uses the @code{cold} label attribute to indicate the +-@code{ErrorHandling} branch is unlikely to be taken and that the +-@code{ErrorHandling} label is unused: +The nineteen 32-bit registers comprising general register R0 to R14, +control register GBR, and system registers MACH, MACL, and PR and the +vector table address offset are saved into a register bank. Register @@ -3503,34 +3743,54 @@ +function should switch to an alternate stack. It expects a string +argument that names a global variable holding the address of the +alternate stack. -+ -+@smallexample + + @smallexample +void *alt_stack; +void f () __attribute__ ((interrupt_handler, + sp_switch ("alt_stack"))); +@end smallexample - @item trap_exit - @cindex @code{trap_exit} function attribute, SH -@@ -4329,256 +4662,341 @@ Use this attribute on the SH for an @code{interrupt_handler} to return using - @code{trapa} instead of @code{rte}. This attribute expects an integer - argument specifying the trap number to be used. - --@item trapa_handler --@cindex @code{trapa_handler} function attribute, SH --On SH targets this function attribute is similar to @code{interrupt_handler} --but it does not save and restore all registers. +- asm goto ("some asm" : : : : NoError); ++@item trap_exit ++@cindex @code{trap_exit} function attribute, SH ++Use this attribute on the SH for an @code{interrupt_handler} to return using ++@code{trapa} instead of @code{rte}. This attribute expects an integer ++argument specifying the trap number to be used. + +-/* This branch (the fall-through from the asm) is less commonly used */ +-ErrorHandling: +- __attribute__((cold, unused)); /* Semi-colon is required here */ +- printf("error\n"); +- return 0; +@item trapa_handler +@cindex @code{trapa_handler} function attribute, SH +On SH targets this function attribute is similar to @code{interrupt_handler} +but it does not save and restore all registers. +@end table -+ + +-NoError: +- printf("no error\n"); +- return 1; +-@end smallexample +@node SPU Function Attributes +@subsection SPU Function Attributes -+ + +-@table @code +-@item unused +-@cindex @code{unused} label attribute +-This feature is intended for program-generated code that may contain +-unused labels, but which is compiled with @option{-Wall}. It is +-not normally appropriate to use in it human-written code, though it +-could be useful in cases where the code that jumps to the label is +-contained within an @code{#ifdef} conditional. +These function attributes are supported by the SPU back end: -+ + +-@item hot +-@cindex @code{hot} label attribute +-The @code{hot} attribute on a label is used to inform the compiler that +-the path following the label is more likely than paths that are not so +-annotated. This attribute is used in cases where @code{__builtin_expect} +-cannot be used, for instance with computed goto or @code{asm goto}. +@table @code +@item naked +@cindex @code{naked} function attribute, SPU @@ -3543,18 +3803,40 @@ +basic @code{asm} and C code may appear to work, they cannot be +depended upon to work reliably and are not supported. +@end table -+ + +-@item cold +-@cindex @code{cold} label attribute +-The @code{cold} attribute on labels is used to inform the compiler that +-the path following the label is unlikely to be executed. This attribute +-is used in cases where @code{__builtin_expect} cannot be used, for instance +-with computed goto or @code{asm goto}. +@node Symbian OS Function Attributes +@subsection Symbian OS Function Attributes -+ + +-@end table +@xref{Microsoft Windows Function Attributes}, for discussion of the +@code{dllexport} and @code{dllimport} attributes. -+ + +-@node Attribute Syntax +-@section Attribute Syntax +-@cindex attribute syntax +@node Visium Function Attributes +@subsection Visium Function Attributes -+ + +-This section describes the syntax with which @code{__attribute__} may be +-used, and the constructs to which attribute specifiers bind, for the C +-language. Some details may vary for C++ and Objective-C@. Because of +-infelicities in the grammar for attributes, some forms described here +-may not be successfully parsed in all cases. +These function attributes are supported by the Visium back end: -+ + +-There are some problems with the semantics of attributes in C++. For +-example, there are no manglings for attributes, although they may affect +-code generation, so problems may arise when attributed types are used in +-conjunction with templates or overloading. Similarly, @code{typeid} +-does not distinguish between types with different attributes. Support +-for attributes in C++ may be restricted in future to attributes on +-declarations only, but not on nested declarators. +@table @code +@item interrupt +@cindex @code{interrupt} function attribute, Visium @@ -3563,12 +3845,26 @@ +function entry and exit sequences suitable for use in an interrupt handler +when this attribute is present. +@end table -+ + +-@xref{Function Attributes}, for details of the semantics of attributes +-applying to functions. @xref{Variable Attributes}, for details of the +-semantics of attributes applying to variables. @xref{Type Attributes}, +-for details of the semantics of attributes applying to structure, union +-and enumerated types. +-@xref{Label Attributes}, for details of the semantics of attributes +-applying to labels. +@node x86 Function Attributes +@subsection x86 Function Attributes -+ + +-An @dfn{attribute specifier} is of the form +-@code{__attribute__ ((@var{attribute-list}))}. An @dfn{attribute list} +-is a possibly empty comma-separated sequence of @dfn{attributes}, where +-each attribute is one of the following: +These function attributes are supported by the x86 back end: -+ + +-@itemize @bullet +-@item +-Empty. Empty attributes are ignored. +@table @code +@item cdecl +@cindex @code{cdecl} function attribute, x86-32 @@ -3578,7 +3874,10 @@ +assume that the calling function pops off the stack space used to +pass arguments. This is +useful to override the effects of the @option{-mrtd} switch. -+ + +-@item +-A word (which may be an identifier such as @code{unused}, or a reserved +-word such as @code{const}). +@item fastcall +@cindex @code{fastcall} function attribute, x86-32 +@cindex functions that pop the argument stack on x86-32 @@ -3588,7 +3887,10 @@ +and other typed arguments are passed on the stack. The called function +pops the arguments off the stack. If the number of arguments is variable all +arguments are pushed on the stack. -+ + +-@item +-A word, followed by, in parentheses, parameters for the attribute. +-These parameters take one of the following forms: +@item thiscall +@cindex @code{thiscall} function attribute, x86-32 +@cindex functions that pop the argument stack on x86-32 @@ -3601,17 +3903,18 @@ +The @code{thiscall} attribute is intended for C++ non-static member functions. +As a GCC extension, this calling convention can be used for C functions +and for static member methods. -+ + +-@itemize @bullet +-@item +-An identifier. For example, @code{mode} attributes use this form. +@item ms_abi +@itemx sysv_abi +@cindex @code{ms_abi} function attribute, x86 +@cindex @code{sysv_abi} function attribute, x86 --@item unused --@cindex @code{unused} function attribute --This attribute, attached to a function, means that the function is meant --to be possibly unused. GCC does not produce a warning for this --function. +-@item +-An identifier followed by a comma and a non-empty comma-separated list +-of expressions. For example, @code{format} attributes use this form. +On 32-bit and 64-bit x86 targets, you can use an ABI attribute +to indicate which calling convention should be used for a function. The +@code{ms_abi} attribute tells the compiler to use the Microsoft ABI, @@ -3619,62 +3922,63 @@ +used on GNU/Linux and other systems. The default is to use the Microsoft ABI +when targeting Windows. On all other systems, the default is the x86/AMD ABI. --@item used --@cindex @code{used} function attribute --This attribute, attached to a function, means that code must be emitted --for the function even if it appears that the function is not referenced. --This is useful, for example, when the function is referenced only in --inline assembly. +-@item +-A possibly empty comma-separated list of expressions. For example, +-@code{format_arg} attributes use this form with the list being a single +-integer constant expression, and @code{alias} attributes use this form +-with the list being a single string constant. +-@end itemize +-@end itemize +Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently +requires the @option{-maccumulate-outgoing-args} option. --When applied to a member function of a C++ class template, the --attribute also means that the function is instantiated if the --class itself is instantiated. +-An @dfn{attribute specifier list} is a sequence of one or more attribute +-specifiers, not separated by any other tokens. +@item callee_pop_aggregate_return (@var{number}) +@cindex @code{callee_pop_aggregate_return} function attribute, x86 --@item vector --@cindex @code{vector} function attribute, RX --This RX attribute is similar to the @code{interrupt} attribute, including its --parameters, but does not make the function an interrupt-handler type --function (i.e. it retains the normal C function calling ABI). See the --@code{interrupt} attribute for a description of its arguments. +-@subsubheading Label Attributes +On x86-32 targets, you can use this attribute to control how +aggregates are returned in memory. If the caller is responsible for +popping the hidden pointer together with the rest of the arguments, specify +@var{number} equal to zero. If callee is responsible for popping the +hidden pointer, specify @var{number} equal to one. --@item version_id --@cindex @code{version_id} function attribute, IA-64 --This IA-64 HP-UX attribute, attached to a global variable or function, renames a --symbol to contain a version string, thus allowing for function level --versioning. HP-UX system header files may use function level versioning --for some system calls. +-In GNU C, an attribute specifier list may appear after the colon following a +-label, other than a @code{case} or @code{default} label. GNU C++ only permits +-attributes on labels if the attribute specifier is immediately +-followed by a semicolon (i.e., the label applies to an empty +-statement). If the semicolon is missing, C++ label attributes are +-ambiguous, as it is permissible for a declaration, which could begin +-with an attribute list, to be labelled in C++. Declarations cannot be +-labelled in C90 or C99, so the ambiguity does not arise there. +The default x86-32 ABI assumes that the callee pops the +stack for hidden pointer. However, on x86-32 Microsoft Windows targets, +the compiler assumes that the +caller pops the stack for hidden pointer. --@smallexample --extern int foo () __attribute__((version_id ("20040821"))); --@end smallexample +-@subsubheading Type Attributes +@item ms_hook_prologue +@cindex @code{ms_hook_prologue} function attribute, x86 --@noindent --Calls to @var{foo} are mapped to calls to @var{foo@{20040821@}}. +-An attribute specifier list may appear as part of a @code{struct}, +-@code{union} or @code{enum} specifier. It may go either immediately +-after the @code{struct}, @code{union} or @code{enum} keyword, or after +-the closing brace. The former syntax is preferred. +-Where attribute specifiers follow the closing brace, they are considered +-to relate to the structure, union or enumerated type defined, not to any +-enclosing declaration the type specifier appears in, and the type +-defined is not complete until after the attribute specifiers. +-@c Otherwise, there would be the following problems: a shift/reduce +-@c conflict between attributes binding the struct/union/enum and +-@c binding to the list of specifiers/qualifiers; and "aligned" +-@c attributes could use sizeof for the structure, but the size could be +-@c changed later by "packed" attributes. +On 32-bit and 64-bit x86 targets, you can use +this function attribute to make GCC generate the ``hot-patching'' function +prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2 +and newer. --@item visibility ("@var{visibility_type}") --@cindex @code{visibility} function attribute --This attribute affects the linkage of the declaration to which it is attached. --There are four supported @var{visibility_type} values: default, --hidden, protected or internal visibility. +@item regparm (@var{number}) +@cindex @code{regparm} function attribute, x86 +@cindex functions that are passed arguments in registers on x86-32 @@ -3684,11 +3988,7 @@ +take a variable number of arguments continue to be passed all of their +arguments on the stack. --@smallexample --void __attribute__ ((visibility ("protected"))) --f () @{ /* @r{Do something.} */; @} --int i __attribute__ ((visibility ("hidden"))); --@end smallexample +-@subsubheading All other attributes +Beware that on some ELF systems this attribute is unsuitable for +global functions in shared libraries with lazy binding (which is the +default). Lazy binding sends the first call via resolving code in @@ -3700,8 +4000,15 @@ +disabled with the linker or the loader if desired, to avoid the +problem.) --The possible values of @var{visibility_type} correspond to the --visibility settings in the ELF gABI. +-Otherwise, an attribute specifier appears as part of a declaration, +-counting declarations of unnamed parameters and type names, and relates +-to that declaration (which may be nested in another declaration, for +-example in the case of a parameter declaration), or to a particular declarator +-within a declaration. Where an +-attribute specifier is applied to a parameter declared as a function or +-an array, it should apply to the function or array rather than the +-pointer to which the parameter is implicitly converted, but this is not +-yet correctly implemented. +@item sseregparm +@cindex @code{sseregparm} function attribute, x86 +On x86-32 targets with SSE support, the @code{sseregparm} attribute @@ -3710,8 +4017,22 @@ +variable number of arguments continue to pass all of their +floating-point arguments on the stack. --@table @dfn --@c keep this list of visibilities in alphabetical order. +-Any list of specifiers and qualifiers at the start of a declaration may +-contain attribute specifiers, whether or not such a list may in that +-context contain storage class specifiers. (Some attributes, however, +-are essentially in the nature of storage class specifiers, and only make +-sense where storage class specifiers may be used; for example, +-@code{section}.) There is one necessary limitation to this syntax: the +-first old-style parameter declaration in a function definition cannot +-begin with an attribute specifier, because such an attribute applies to +-the function instead by syntax described below (which, however, is not +-yet implemented in this case). In some other cases, attribute +-specifiers are permitted by this grammar but not yet supported by the +-compiler. All attribute specifiers in this place relate to the +-declaration as a whole. In the obsolescent usage where a type of +-@code{int} is implied by the absence of type specifiers, such a list of +-specifiers and qualifiers may be an attribute specifier list with no +-other specifiers or qualifiers. +@item force_align_arg_pointer +@cindex @code{force_align_arg_pointer} function attribute, x86 +On x86 targets, the @code{force_align_arg_pointer} attribute may be @@ -3720,10 +4041,14 @@ +This supports mixing legacy codes that run with a 4-byte aligned stack +with modern codes that keep a 16-byte stack for SSE compatibility. --@item default --Default visibility is the normal case for the object file format. --This value is available for the visibility attribute to override other --options that may change the assumed visibility of entities. +-At present, the first parameter in a function prototype must have some +-type specifier that is not an attribute specifier; this resolves an +-ambiguity in the interpretation of @code{void f(int +-(__attribute__((foo)) x))}, but is subject to change. At present, if +-the parentheses of a function declarator contain only attributes then +-those attributes are ignored, rather than yielding an error or warning +-or implying a single parameter of type int, but this is subject to +-change. +@item stdcall +@cindex @code{stdcall} function attribute, x86-32 +@cindex functions that pop the argument stack on x86-32 @@ -3731,16 +4056,22 @@ +assume that the called function pops off the stack space used to +pass arguments, unless it takes a variable number of arguments. --On ELF, default visibility means that the declaration is visible to other --modules and, in shared libraries, means that the declared entity may be --overridden. +-An attribute specifier list may appear immediately before a declarator +-(other than the first) in a comma-separated list of declarators in a +-declaration of more than one identifier using a single list of +-specifiers and qualifiers. Such attribute specifiers apply +-only to the identifier before whose declarator they appear. For +-example, in +@item target (@var{options}) +@cindex @code{target} function attribute +As discussed in @ref{Common Function Attributes}, this attribute +allows specification of target-specific compilation options. --On Darwin, default visibility means that the declaration is visible to --other modules. +-@smallexample +-__attribute__((noreturn)) void d0 (void), +- __attribute__((format(printf, 1, 2))) d1 (const char *, ...), +- d2 (void); +-@end smallexample +On the x86, the following options are allowed: +@table @samp +@item abm @@ -3748,244 +4079,250 @@ +@cindex @code{target("abm")} function attribute, x86 +Enable/disable the generation of the advanced bit instructions. --Default visibility corresponds to ``external linkage'' in the language. +-@noindent +-the @code{noreturn} attribute applies to all the functions +-declared; the @code{format} attribute only applies to @code{d1}. +@item aes +@itemx no-aes +@cindex @code{target("aes")} function attribute, x86 +Enable/disable the generation of the AES instructions. --@item hidden --Hidden visibility indicates that the entity declared has a new --form of linkage, which we call ``hidden linkage''. Two --declarations of an object with hidden linkage refer to the same object --if they are in the same shared object. +-An attribute specifier list may appear immediately before the comma, +-@code{=} or semicolon terminating the declaration of an identifier other +-than a function definition. Such attribute specifiers apply +-to the declared object or function. Where an +-assembler name for an object or function is specified (@pxref{Asm +-Labels}), the attribute must follow the @code{asm} +-specification. +@item default +@cindex @code{target("default")} function attribute, x86 +@xref{Function Multiversioning}, where it is used to specify the +default function version. --@item internal --Internal visibility is like hidden visibility, but with additional --processor specific semantics. Unless otherwise specified by the --psABI, GCC defines internal visibility to mean that a function is --@emph{never} called from another module. Compare this with hidden --functions which, while they cannot be referenced directly by other --modules, can be referenced indirectly via function pointers. By --indicating that a function cannot be called from outside the module, --GCC may for instance omit the load of a PIC register since it is known --that the calling function loaded the correct value. +-An attribute specifier list may, in future, be permitted to appear after +-the declarator in a function definition (before any old-style parameter +-declarations or the function body). +@item mmx +@itemx no-mmx +@cindex @code{target("mmx")} function attribute, x86 +Enable/disable the generation of the MMX instructions. --@item protected --Protected visibility is like default visibility except that it --indicates that references within the defining module bind to the --definition in that module. That is, the declared entity cannot be --overridden by another module. +-Attribute specifiers may be mixed with type qualifiers appearing inside +-the @code{[]} of a parameter array declarator, in the C99 construct by +-which such qualifiers are applied to the pointer to which the array is +-implicitly converted. Such attribute specifiers apply to the pointer, +-not to the array, but at present this is not implemented and they are +-ignored. +@item pclmul +@itemx no-pclmul +@cindex @code{target("pclmul")} function attribute, x86 +Enable/disable the generation of the PCLMUL instructions. --@end table +-An attribute specifier list may appear at the start of a nested +-declarator. At present, there are some limitations in this usage: the +-attributes correctly apply to the declarator, but for most individual +-attributes the semantics this implies are not implemented. +-When attribute specifiers follow the @code{*} of a pointer +-declarator, they may be mixed with any type qualifiers present. +-The following describes the formal semantics of this syntax. It makes the +-most sense if you are familiar with the formal specification of +-declarators in the ISO C standard. +@item popcnt +@itemx no-popcnt +@cindex @code{target("popcnt")} function attribute, x86 +Enable/disable the generation of the POPCNT instruction. --All visibilities are supported on many, but not all, ELF targets --(supported when the assembler supports the @samp{.visibility} --pseudo-op). Default visibility is supported everywhere. Hidden --visibility is supported on Darwin targets. +-Consider (as in C99 subclause 6.7.5 paragraph 4) a declaration @code{T +-D1}, where @code{T} contains declaration specifiers that specify a type +-@var{Type} (such as @code{int}) and @code{D1} is a declarator that +-contains an identifier @var{ident}. The type specified for @var{ident} +-for derived declarators whose type does not include an attribute +-specifier is as in the ISO C standard. +@item sse +@itemx no-sse +@cindex @code{target("sse")} function attribute, x86 +Enable/disable the generation of the SSE instructions. --The visibility attribute should be applied only to declarations that --would otherwise have external linkage. The attribute should be applied --consistently, so that the same entity should not be declared with --different settings of the attribute. +-If @code{D1} has the form @code{( @var{attribute-specifier-list} D )}, +-and the declaration @code{T D} specifies the type +-``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then +-@code{T D1} specifies the type ``@var{derived-declarator-type-list} +-@var{attribute-specifier-list} @var{Type}'' for @var{ident}. +@item sse2 +@itemx no-sse2 +@cindex @code{target("sse2")} function attribute, x86 +Enable/disable the generation of the SSE2 instructions. --In C++, the visibility attribute applies to types as well as functions --and objects, because in C++ types have linkage. A class must not have --greater visibility than its non-static data member types and bases, --and class members default to the visibility of their class. Also, a --declaration without explicit visibility is limited to the visibility --of its type. +-If @code{D1} has the form @code{* +-@var{type-qualifier-and-attribute-specifier-list} D}, and the +-declaration @code{T D} specifies the type +-``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then +-@code{T D1} specifies the type ``@var{derived-declarator-type-list} +-@var{type-qualifier-and-attribute-specifier-list} pointer to @var{Type}'' for +-@var{ident}. +@item sse3 +@itemx no-sse3 +@cindex @code{target("sse3")} function attribute, x86 +Enable/disable the generation of the SSE3 instructions. --In C++, you can mark member functions and static member variables of a --class with the visibility attribute. This is useful if you know a --particular method or static member variable should only be used from --one shared object; then you can mark it hidden while the rest of the --class has default visibility. Care must be taken to avoid breaking --the One Definition Rule; for example, it is usually not useful to mark --an inline method as hidden without marking the whole class as hidden. +-For example, +@item sse4 +@itemx no-sse4 +@cindex @code{target("sse4")} function attribute, x86 +Enable/disable the generation of the SSE4 instructions (both SSE4.1 +and SSE4.2). --A C++ namespace declaration can also have the visibility attribute. +-@smallexample +-void (__attribute__((noreturn)) ****f) (void); +-@end smallexample +@item sse4.1 +@itemx no-sse4.1 +@cindex @code{target("sse4.1")} function attribute, x86 +Enable/disable the generation of the sse4.1 instructions. --@smallexample --namespace nspace1 __attribute__ ((visibility ("protected"))) --@{ /* @r{Do something.} */; @} --@end smallexample +-@noindent +-specifies the type ``pointer to pointer to pointer to pointer to +-non-returning function returning @code{void}''. As another example, +@item sse4.2 +@itemx no-sse4.2 +@cindex @code{target("sse4.2")} function attribute, x86 +Enable/disable the generation of the sse4.2 instructions. --This attribute applies only to the particular namespace body, not to --other definitions of the same namespace; it is equivalent to using --@samp{#pragma GCC visibility} before and after the namespace --definition (@pxref{Visibility Pragmas}). +-@smallexample +-char *__attribute__((aligned(8))) *f; +-@end smallexample +@item sse4a +@itemx no-sse4a +@cindex @code{target("sse4a")} function attribute, x86 +Enable/disable the generation of the SSE4A instructions. --In C++, if a template argument has limited visibility, this --restriction is implicitly propagated to the template instantiation. --Otherwise, template instantiations and specializations default to the --visibility of their template. +-@noindent +-specifies the type ``pointer to 8-byte-aligned pointer to @code{char}''. +-Note again that this does not work with most attributes; for example, +-the usage of @samp{aligned} and @samp{noreturn} attributes given above +-is not yet supported. +@item fma4 +@itemx no-fma4 +@cindex @code{target("fma4")} function attribute, x86 +Enable/disable the generation of the FMA4 instructions. --If both the template and enclosing class have explicit visibility, the --visibility from the template is used. +-For compatibility with existing code written for compiler versions that +-did not implement attributes on nested declarators, some laxity is +-allowed in the placing of attributes. If an attribute that only applies +-to types is applied to a declaration, it is treated as applying to +-the type of that declaration. If an attribute that only applies to +-declarations is applied to the type of a declaration, it is treated +-as applying to that declaration; and, for compatibility with code +-placing the attributes immediately before the identifier declared, such +-an attribute applied to a function return type is treated as +-applying to the function type, and such an attribute applied to an array +-element type is treated as applying to the array type. If an +-attribute that only applies to function types is applied to a +-pointer-to-function type, it is treated as applying to the pointer +-target type; if such an attribute is applied to a function return type +-that is not a pointer-to-function type, it is treated as applying +-to the function type. +@item xop +@itemx no-xop +@cindex @code{target("xop")} function attribute, x86 +Enable/disable the generation of the XOP instructions. --@item vliw --@cindex @code{vliw} function attribute, MeP --On MeP, the @code{vliw} attribute tells the compiler to emit --instructions in VLIW mode instead of core mode. Note that this --attribute is not allowed unless a VLIW coprocessor has been configured --and enabled through command-line options. +-@node Function Prototypes +-@section Prototypes and Old-Style Function Definitions +-@cindex function prototype declarations +-@cindex old-style function definitions +-@cindex promotion of formal parameters +@item lwp +@itemx no-lwp +@cindex @code{target("lwp")} function attribute, x86 +Enable/disable the generation of the LWP instructions. --@item warn_unused_result --@cindex @code{warn_unused_result} function attribute --The @code{warn_unused_result} attribute causes a warning to be emitted --if a caller of the function with this attribute does not use its --return value. This is useful for functions where not checking --the result is either a security problem or always a bug, such as --@code{realloc}. +-GNU C extends ISO C to allow a function prototype to override a later +-old-style non-prototype definition. Consider the following example: +@item ssse3 +@itemx no-ssse3 +@cindex @code{target("ssse3")} function attribute, x86 +Enable/disable the generation of the SSSE3 instructions. -@smallexample --int fn () __attribute__ ((warn_unused_result)); --int foo () --@{ -- if (fn () < 0) return -1; -- fn (); -- return 0; --@} --@end smallexample +-/* @r{Use prototypes unless the compiler is old-fashioned.} */ +-#ifdef __STDC__ +-#define P(x) x +-#else +-#define P(x) () +-#endif +@item cld +@itemx no-cld +@cindex @code{target("cld")} function attribute, x86 +Enable/disable the generation of the CLD before string moves. --@noindent --results in warning on line 5. +-/* @r{Prototype function declaration.} */ +-int isroot P((uid_t)); +@item fancy-math-387 +@itemx no-fancy-math-387 +@cindex @code{target("fancy-math-387")} function attribute, x86 +Enable/disable the generation of the @code{sin}, @code{cos}, and +@code{sqrt} instructions on the 387 floating-point unit. --@item weak --@cindex @code{weak} function attribute --The @code{weak} attribute causes the declaration to be emitted as a weak --symbol rather than a global. This is primarily useful in defining --library functions that can be overridden in user code, though it can --also be used with non-function declarations. Weak symbols are supported --for ELF targets, and also for a.out targets when using the GNU assembler --and linker. +-/* @r{Old-style function definition.} */ +-int +-isroot (x) /* @r{??? lossage here ???} */ +- uid_t x; +-@{ +- return x == 0; +-@} +-@end smallexample +@item fused-madd +@itemx no-fused-madd +@cindex @code{target("fused-madd")} function attribute, x86 +Enable/disable the generation of the fused multiply/add instructions. --@item weakref --@itemx weakref ("@var{target}") --@cindex @code{weakref} function attribute --The @code{weakref} attribute marks a declaration as a weak reference. --Without arguments, it should be accompanied by an @code{alias} attribute --naming the target symbol. Optionally, the @var{target} may be given as --an argument to @code{weakref} itself. In either case, @code{weakref} --implicitly marks the declaration as @code{weak}. Without a --@var{target}, given as an argument to @code{weakref} or to @code{alias}, --@code{weakref} is equivalent to @code{weak}. +-Suppose the type @code{uid_t} happens to be @code{short}. ISO C does +-not allow this example, because subword arguments in old-style +-non-prototype definitions are promoted. Therefore in this example the +-function definition's argument is really an @code{int}, which does not +-match the prototype argument type of @code{short}. +@item ieee-fp +@itemx no-ieee-fp +@cindex @code{target("ieee-fp")} function attribute, x86 +Enable/disable the generation of floating point that depends on IEEE arithmetic. --@smallexample --static int x() __attribute__ ((weakref ("y"))); --/* is equivalent to... */ --static int x() __attribute__ ((weak, weakref, alias ("y"))); --/* and to... */ --static int x() __attribute__ ((weakref)); --static int x() __attribute__ ((alias ("y"))); --@end smallexample +-This restriction of ISO C makes it hard to write code that is portable +-to traditional C compilers, because the programmer does not know +-whether the @code{uid_t} type is @code{short}, @code{int}, or +-@code{long}. Therefore, in cases like these GNU C allows a prototype +-to override a later old-style definition. More precisely, in GNU C, a +-function prototype argument type overrides the argument type specified +-by a later old-style definition if the former type is the same as the +-latter type before promotion. Thus in GNU C the above example is +-equivalent to the following: +@item inline-all-stringops +@itemx no-inline-all-stringops +@cindex @code{target("inline-all-stringops")} function attribute, x86 +Enable/disable inlining of string operations. --A weak reference is an alias that does not by itself require a --definition to be given for the target symbol. If the target symbol is --only referenced through weak references, then it becomes a @code{weak} --undefined symbol. If it is directly referenced, however, then such --strong references prevail, and a definition is required for the --symbol, not necessarily in the same translation unit. +-@smallexample +-int isroot (uid_t); +@item inline-stringops-dynamically +@itemx no-inline-stringops-dynamically +@cindex @code{target("inline-stringops-dynamically")} function attribute, x86 +Enable/disable the generation of the inline code to do small string +operations and calling the library routines for large operations. --The effect is equivalent to moving all references to the alias to a --separate translation unit, renaming the alias to the aliased symbol, --declaring it as weak, compiling the two separate translation units and --performing a reloadable link on them. +-int +-isroot (uid_t x) +-@{ +- return x == 0; +-@} +-@end smallexample +@item align-stringops +@itemx no-align-stringops +@cindex @code{target("align-stringops")} function attribute, x86 +Do/do not align destination of inlined string operations. --At present, a declaration to which @code{weakref} is attached can --only be @code{static}. +-@noindent +-GNU C++ does not support old-style function definitions, so this +-extension is irrelevant. +@item recip +@itemx no-recip +@cindex @code{target("recip")} function attribute, x86 @@ -3993,24 +4330,30 @@ +instructions followed an additional Newton-Raphson step instead of +doing a floating-point division. --@end table +-@node C++ Comments +-@section C++ Style Comments +-@cindex @code{//} +-@cindex C++ comments +-@cindex comments, C++ style +@item arch=@var{ARCH} +@cindex @code{target("arch=@var{ARCH}")} function attribute, x86 +Specify the architecture to generate code for in compiling the function. --You can specify multiple attributes in a declaration by separating them --by commas within the double parentheses or by immediately following an --attribute declaration with another attribute declaration. +-In GNU C, you may use C++ style comments, which start with @samp{//} and +-continue until the end of the line. Many other C implementations allow +-such comments, and they are included in the 1999 C standard. However, +-C++ style comments are not recognized if you specify an @option{-std} +-option specifying a version of ISO C before C99, or @option{-ansi} +-(equivalent to @option{-std=c90}). +@item tune=@var{TUNE} +@cindex @code{target("tune=@var{TUNE}")} function attribute, x86 +Specify the architecture to tune for in compiling the function. --@cindex @code{#pragma}, reason for not using --@cindex pragma, reason for not using --Some people object to the @code{__attribute__} feature, suggesting that --ISO C's @code{#pragma} should be used instead. At the time --@code{__attribute__} was designed, there were two reasons for not doing --this. +-@node Dollar Signs +-@section Dollar Signs in Identifier Names +-@cindex $ +-@cindex dollar signs in identifier names +-@cindex identifier names, dollar signs in +@item fpmath=@var{FPMATH} +@cindex @code{target("fpmath=@var{FPMATH}")} function attribute, x86 +Specify which floating-point unit to use. You must specify the @@ -4019,9 +4362,10 @@ +different options. +@end table --@enumerate --@item --It is impossible to generate @code{#pragma} commands from a macro. +-In GNU C, you may normally use dollar signs in identifier names. +-This is because many traditional C implementations allow such identifiers. +-However, dollar signs in identifiers are not supported on a few target +-machines, typically because the target assembler does not allow them. +On the x86, the inliner does not inline a +function that has different target options than the caller, unless the +callee has a subset of the target options of the caller. For example @@ -4029,25 +4373,15 @@ +with @code{target("sse2")}, since @code{-msse3} implies @code{-msse2}. +@end table --@item --There is no telling what the same @code{#pragma} might mean in another --compiler. --@end enumerate +-@node Character Escapes +-@section The Character @key{ESC} in Constants +@node Xstormy16 Function Attributes +@subsection Xstormy16 Function Attributes --These two reasons applied to almost any application that might have been --proposed for @code{#pragma}. It was basically a mistake to use --@code{#pragma} for @emph{anything}. +-You can use the sequence @samp{\e} in a string or character constant to +-stand for the ASCII character @key{ESC}. +These function attributes are supported by the Xstormy16 back end: - --The ISO C99 standard includes @code{_Pragma}, which now allows pragmas --to be generated from macros. In addition, a @code{#pragma GCC} --namespace is now in use for GCC-specific pragmas. However, it has been --found convenient to use @code{__attribute__} to achieve a natural --attachment of attributes to their corresponding declarations, whereas --@code{#pragma GCC} is of use for constructs that do not naturally form --part of the grammar. @xref{Pragmas,,Pragmas Accepted by GCC}. ++ +@table @code +@item interrupt +@cindex @code{interrupt} function attribute, Xstormy16 @@ -4057,39 +4391,9 @@ +when this attribute is present. +@end table - @node Label Attributes - @section Label Attributes -@@ -4669,11 +5087,13 @@ each attribute is one of the following: - Empty. Empty attributes are ignored. - - @item --A word (which may be an identifier such as @code{unused}, or a reserved -+An attribute name -+(which may be an identifier such as @code{unused}, or a reserved - word such as @code{const}). - - @item --A word, followed by, in parentheses, parameters for the attribute. -+An attribute name followed by a parenthesized list of -+parameters for the attribute. - These parameters take one of the following forms: - - @itemize @bullet -@@ -4695,6 +5115,13 @@ with the list being a single string constant. - An @dfn{attribute specifier list} is a sequence of one or more attribute - specifiers, not separated by any other tokens. - -+You may optionally specify attribute names with @samp{__} -+preceding and following the name. -+This allows you to use them in header files without -+being concerned about a possible macro of the same name. For example, -+you may use the attribute name @code{__noreturn__} instead of @code{noreturn}. -+ -+ - @subsubheading Label Attributes - - In GNU C, an attribute specifier list may appear after the colon following a -@@ -4970,14 +5397,29 @@ types (@pxref{Type Attributes}). + @node Variable Attributes + @section Specifying Attributes of Variables +@@ -4970,14 +5036,29 @@ types (@pxref{Type Attributes}). Other front ends might define more attributes (@pxref{C++ Extensions,,Extensions to the C++ Language}). @@ -4124,7 +4428,7 @@ @table @code @cindex @code{aligned} variable attribute @item aligned (@var{alignment}) -@@ -5046,7 +5488,7 @@ in an @code{__attribute__} still only provides you with 8-byte +@@ -5046,7 +5127,7 @@ in an @code{__attribute__} still only provides you with 8-byte alignment. See your linker documentation for further information. The @code{aligned} attribute can also be used for functions @@ -4133,7 +4437,7 @@ @item cleanup (@var{cleanup_function}) @cindex @code{cleanup} variable attribute -@@ -5100,7 +5542,8 @@ argument, which must be a string, is printed in the warning if +@@ -5100,7 +5181,8 @@ argument, which must be a string, is printed in the warning if present. The @code{deprecated} attribute can also be used for functions and @@ -4143,10 +4447,28 @@ @item mode (@var{mode}) @cindex @code{mode} variable attribute -@@ -5186,33 +5629,6 @@ attribute is not available on all platforms. - If you need to map the entire contents of a module to a particular - section, consider using the facilities of the linker instead. +@@ -5168,50 +5250,23 @@ main() + @noindent + Use the @code{section} attribute with +-@emph{global} variables and not @emph{local} variables, +-as shown in the example. +- +-You may use the @code{section} attribute with initialized or +-uninitialized global variables but the linker requires +-each object be defined once, with the exception that uninitialized +-variables tentatively go in the @code{common} (or @code{bss}) section +-and can be multiply ``defined''. Using the @code{section} attribute +-changes what section the variable goes into and may cause the +-linker to issue an error if an uninitialized variable has multiple +-definitions. You can force a variable to be initialized with the +-@option{-fno-common} flag or the @code{nocommon} attribute. +- +-Some file formats do not support arbitrary sections so the @code{section} +-attribute is not available on all platforms. +-If you need to map the entire contents of a module to a particular +-section, consider using the facilities of the linker instead. +- -@item shared -@cindex @code{shared} variable attribute -On Microsoft Windows, in addition to putting variable definitions in a named @@ -4171,267 +4493,1447 @@ -You may only use the @code{shared} attribute along with @code{section} -attribute with a fully-initialized global definition because of the way -linkers work. See @code{section} attribute for more information. -- ++@emph{global} variables and not @emph{local} variables, ++as shown in the example. + -The @code{shared} attribute is only available on Microsoft Windows@. -- ++You may use the @code{section} attribute with initialized or ++uninitialized global variables but the linker requires ++each object be defined once, with the exception that uninitialized ++variables tentatively go in the @code{common} (or @code{bss}) section ++and can be multiply ``defined''. Using the @code{section} attribute ++changes what section the variable goes into and may cause the ++linker to issue an error if an uninitialized variable has multiple ++definitions. You can force a variable to be initialized with the ++@option{-fno-common} flag or the @code{nocommon} attribute. ++ ++Some file formats do not support arbitrary sections so the @code{section} ++attribute is not available on all platforms. ++If you need to map the entire contents of a module to a particular ++section, consider using the facilities of the linker instead. + @item tls_model ("@var{tls_model}") @cindex @code{tls_model} variable attribute - The @code{tls_model} attribute sets thread-local storage model -@@ -5270,42 +5686,14 @@ struct S __attribute__ ((vector_size (16))) foo; - is invalid even if the size of the structure is the same as the size of - the @code{int}. - --@item selectany --@cindex @code{selectany} variable attribute --The @code{selectany} attribute causes an initialized global variable to --have link-once semantics. When multiple definitions of the variable are --encountered by the linker, the first is selected and the remainder are --discarded. Following usage by the Microsoft compiler, the linker is told --@emph{not} to warn about size or content differences of the multiple --definitions. -- --Although the primary usage of this attribute is for POD types, the --attribute can also be applied to global C++ objects that are initialized --by a constructor. In this case, the static initialization and destruction --code for the object is emitted in each translation defining the object, --but the calls to the constructor and destructor are protected by a --link-once guard variable. -- --The @code{selectany} attribute is only available on Microsoft Windows --targets. You can use @code{__declspec (selectany)} as a synonym for --@code{__attribute__ ((selectany))} for compatibility with other --compilers. -- - @item weak - @cindex @code{weak} variable attribute +@@ -5258,17 +5313,280 @@ This attribute is only applicable to integral and float scalars, + although arrays, pointers, and function return values are allowed in + conjunction with this construct. + +-Aggregates with this attribute are invalid, even if they are of the same +-size as a corresponding scalar. For example, the declaration: ++Aggregates with this attribute are invalid, even if they are of the same ++size as a corresponding scalar. For example, the declaration: ++ ++@smallexample ++struct S @{ int a; @}; ++struct S __attribute__ ((vector_size (16))) foo; ++@end smallexample ++ ++@noindent ++is invalid even if the size of the structure is the same as the size of ++the @code{int}. ++ ++@item weak ++@cindex @code{weak} variable attribute ++The @code{weak} attribute is described in ++@ref{Common Function Attributes}. ++ ++@end table ++ ++@node AVR Variable Attributes ++@subsection AVR Variable Attributes ++ ++@table @code ++@item progmem ++@cindex @code{progmem} variable attribute, AVR ++The @code{progmem} attribute is used on the AVR to place read-only ++data in the non-volatile program memory (flash). The @code{progmem} ++attribute accomplishes this by putting respective variables into a ++section whose name starts with @code{.progmem}. ++ ++This attribute works similar to the @code{section} attribute ++but adds additional checking. Notice that just like the ++@code{section} attribute, @code{progmem} affects the location ++of the data but not how this data is accessed. ++ ++In order to read data located with the @code{progmem} attribute ++(inline) assembler must be used. ++@smallexample ++/* Use custom macros from @w{@uref{http://nongnu.org/avr-libc/user-manual/,AVR-LibC}} */ ++#include ++ ++/* Locate var in flash memory */ ++const int var[2] PROGMEM = @{ 1, 2 @}; ++ ++int read_var (int i) ++@{ ++ /* Access var[] by accessor macro from avr/pgmspace.h */ ++ return (int) pgm_read_word (& var[i]); ++@} ++@end smallexample ++ ++AVR is a Harvard architecture processor and data and read-only data ++normally resides in the data memory (RAM). ++ ++See also the @ref{AVR Named Address Spaces} section for ++an alternate way to locate and access data in flash memory. ++ ++@item io ++@itemx io (@var{addr}) ++@cindex @code{io} variable attribute, AVR ++Variables with the @code{io} attribute are used to address ++memory-mapped peripherals in the io address range. ++If an address is specified, the variable ++is assigned that address, and the value is interpreted as an ++address in the data address space. ++Example: ++ ++@smallexample ++volatile int porta __attribute__((io (0x22))); ++@end smallexample ++ ++The address specified in the address in the data address range. ++ ++Otherwise, the variable it is not assigned an address, but the ++compiler will still use in/out instructions where applicable, ++assuming some other module assigns an address in the io address range. ++Example: ++ ++@smallexample ++extern volatile int porta __attribute__((io)); ++@end smallexample ++ ++@item io_low ++@itemx io_low (@var{addr}) ++@cindex @code{io_low} variable attribute, AVR ++This is like the @code{io} attribute, but additionally it informs the ++compiler that the object lies in the lower half of the I/O area, ++allowing the use of @code{cbi}, @code{sbi}, @code{sbic} and @code{sbis} ++instructions. ++ ++@item address ++@itemx address (@var{addr}) ++@cindex @code{address} variable attribute, AVR ++Variables with the @code{address} attribute are used to address ++memory-mapped peripherals that may lie outside the io address range. ++ ++@smallexample ++volatile int porta __attribute__((address (0x600))); ++@end smallexample ++ ++@end table ++ ++@node Blackfin Variable Attributes ++@subsection Blackfin Variable Attributes ++ ++Three attributes are currently defined for the Blackfin. ++ ++@table @code ++@item l1_data ++@itemx l1_data_A ++@itemx l1_data_B ++@cindex @code{l1_data} variable attribute, Blackfin ++@cindex @code{l1_data_A} variable attribute, Blackfin ++@cindex @code{l1_data_B} variable attribute, Blackfin ++Use these attributes on the Blackfin to place the variable into L1 Data SRAM. ++Variables with @code{l1_data} attribute are put into the specific section ++named @code{.l1.data}. Those with @code{l1_data_A} attribute are put into ++the specific section named @code{.l1.data.A}. Those with @code{l1_data_B} ++attribute are put into the specific section named @code{.l1.data.B}. ++ ++@item l2 ++@cindex @code{l2} variable attribute, Blackfin ++Use this attribute on the Blackfin to place the variable into L2 SRAM. ++Variables with @code{l2} attribute are put into the specific section ++named @code{.l2.data}. ++@end table ++ ++@node H8/300 Variable Attributes ++@subsection H8/300 Variable Attributes ++ ++These variable attributes are available for H8/300 targets: ++ ++@table @code ++@item eightbit_data ++@cindex @code{eightbit_data} variable attribute, H8/300 ++@cindex eight-bit data on the H8/300, H8/300H, and H8S ++Use this attribute on the H8/300, H8/300H, and H8S to indicate that the specified ++variable should be placed into the eight-bit data section. ++The compiler generates more efficient code for certain operations ++on data in the eight-bit data area. Note the eight-bit data area is limited to ++256 bytes of data. ++ ++You must use GAS and GLD from GNU binutils version 2.7 or later for ++this attribute to work correctly. ++ ++@item tiny_data ++@cindex @code{tiny_data} variable attribute, H8/300 ++@cindex tiny data section on the H8/300H and H8S ++Use this attribute on the H8/300H and H8S to indicate that the specified ++variable should be placed into the tiny data section. ++The compiler generates more efficient code for loads and stores ++on data in the tiny data section. Note the tiny data area is limited to ++slightly under 32KB of data. ++ ++@end table ++ ++@node IA-64 Variable Attributes ++@subsection IA-64 Variable Attributes ++ ++The IA-64 back end supports the following variable attribute: ++ ++@table @code ++@item model (@var{model-name}) ++@cindex @code{model} variable attribute, IA-64 ++ ++On IA-64, use this attribute to set the addressability of an object. ++At present, the only supported identifier for @var{model-name} is ++@code{small}, indicating addressability via ``small'' (22-bit) ++addresses (so that their addresses can be loaded with the @code{addl} ++instruction). Caveat: such addressing is by definition not position ++independent and hence this attribute must not be used for objects ++defined by shared libraries. ++ ++@end table ++ ++@node M32R/D Variable Attributes ++@subsection M32R/D Variable Attributes ++ ++One attribute is currently defined for the M32R/D@. ++ ++@table @code ++@item model (@var{model-name}) ++@cindex @code{model-name} variable attribute, M32R/D ++@cindex variable addressability on the M32R/D ++Use this attribute on the M32R/D to set the addressability of an object. ++The identifier @var{model-name} is one of @code{small}, @code{medium}, ++or @code{large}, representing each of the code models. ++ ++Small model objects live in the lower 16MB of memory (so that their ++addresses can be loaded with the @code{ld24} instruction). ++ ++Medium and large model objects may live anywhere in the 32-bit address space ++(the compiler generates @code{seth/add3} instructions to load their ++addresses). ++@end table ++ ++@node MeP Variable Attributes ++@subsection MeP Variable Attributes ++ ++The MeP target has a number of addressing modes and busses. The ++@code{near} space spans the standard memory space's first 16 megabytes ++(24 bits). The @code{far} space spans the entire 32-bit memory space. ++The @code{based} space is a 128-byte region in the memory space that ++is addressed relative to the @code{$tp} register. The @code{tiny} ++space is a 65536-byte region relative to the @code{$gp} register. In ++addition to these memory regions, the MeP target has a separate 16-bit ++control bus which is specified with @code{cb} attributes. ++ ++@table @code ++ ++@item based ++@cindex @code{based} variable attribute, MeP ++Any variable with the @code{based} attribute is assigned to the ++@code{.based} section, and is accessed with relative to the ++@code{$tp} register. ++ ++@item tiny ++@cindex @code{tiny} variable attribute, MeP ++Likewise, the @code{tiny} attribute assigned variables to the ++@code{.tiny} section, relative to the @code{$gp} register. ++ ++@item near ++@cindex @code{near} variable attribute, MeP ++Variables with the @code{near} attribute are assumed to have addresses ++that fit in a 24-bit addressing mode. This is the default for large ++variables (@code{-mtiny=4} is the default) but this attribute can ++override @code{-mtiny=} for small variables, or override @code{-ml}. ++ ++@item far ++@cindex @code{far} variable attribute, MeP ++Variables with the @code{far} attribute are addressed using a full ++32-bit address. Since this covers the entire memory space, this ++allows modules to make no assumptions about where variables might be ++stored. ++ ++@item io ++@cindex @code{io} variable attribute, MeP ++@itemx io (@var{addr}) ++Variables with the @code{io} attribute are used to address ++memory-mapped peripherals. If an address is specified, the variable ++is assigned that address, else it is not assigned an address (it is ++assumed some other module assigns an address). Example: ++ ++@smallexample ++int timer_count __attribute__((io(0x123))); ++@end smallexample ++ ++@item cb ++@itemx cb (@var{addr}) ++@cindex @code{cb} variable attribute, MeP ++Variables with the @code{cb} attribute are used to access the control ++bus, using special instructions. @code{addr} indicates the control bus ++address. Example: + + @smallexample +-struct S @{ int a; @}; +-struct S __attribute__ ((vector_size (16))) foo; ++int cpu_clock __attribute__((cb(0x123))); + @end smallexample + +-@noindent +-is invalid even if the size of the structure is the same as the size of +-the @code{int}. ++@end table ++ ++@node Microsoft Windows Variable Attributes ++@subsection Microsoft Windows Variable Attributes ++ ++You can use these attributes on Microsoft Windows targets. ++@ref{x86 Variable Attributes} for additional Windows compatibility ++attributes available on all x86 targets. ++ ++@table @code ++@item dllimport ++@itemx dllexport ++@cindex @code{dllimport} variable attribute ++@cindex @code{dllexport} variable attribute ++The @code{dllimport} and @code{dllexport} attributes are described in ++@ref{Microsoft Windows Function Attributes}. + + @item selectany + @cindex @code{selectany} variable attribute +@@ -5291,891 +5609,1062 @@ targets. You can use @code{__declspec (selectany)} as a synonym for + @code{__attribute__ ((selectany))} for compatibility with other + compilers. + +-@item weak +-@cindex @code{weak} variable attribute -The @code{weak} attribute is described in @ref{Function Attributes}. -- ++@item shared ++@cindex @code{shared} variable attribute ++On Microsoft Windows, in addition to putting variable definitions in a named ++section, the section can also be shared among all running copies of an ++executable or DLL@. For example, this small program defines shared data ++by putting it in a named section @code{shared} and marking the section ++shareable: + -@item dllimport -@cindex @code{dllimport} variable attribute -The @code{dllimport} attribute is described in @ref{Function Attributes}. -- ++@smallexample ++int foo __attribute__((section ("shared"), shared)) = 0; + -@item dllexport -@cindex @code{dllexport} variable attribute -The @code{dllexport} attribute is described in @ref{Function Attributes}. -+The @code{weak} attribute is described in -+@ref{Common Function Attributes}. ++int ++main() ++@{ ++ /* @r{Read and write foo. All running ++ copies see the same value.} */ ++ return 0; ++@} ++@end smallexample ++ ++@noindent ++You may only use the @code{shared} attribute along with @code{section} ++attribute with a fully-initialized global definition because of the way ++linkers work. See @code{section} attribute for more information. ++ ++The @code{shared} attribute is only available on Microsoft Windows@. + + @end table + +-@anchor{AVR Variable Attributes} +-@subsection AVR Variable Attributes ++@node PowerPC Variable Attributes ++@subsection PowerPC Variable Attributes + +-@table @code +-@item progmem +-@cindex @code{progmem} variable attribute, AVR +-The @code{progmem} attribute is used on the AVR to place read-only +-data in the non-volatile program memory (flash). The @code{progmem} +-attribute accomplishes this by putting respective variables into a +-section whose name starts with @code{.progmem}. ++Three attributes currently are defined for PowerPC configurations: ++@code{altivec}, @code{ms_struct} and @code{gcc_struct}. + +-This attribute works similar to the @code{section} attribute +-but adds additional checking. Notice that just like the +-@code{section} attribute, @code{progmem} affects the location +-of the data but not how this data is accessed. ++@cindex @code{ms_struct} variable attribute, PowerPC ++@cindex @code{gcc_struct} variable attribute, PowerPC ++For full documentation of the struct attributes please see the ++documentation in @ref{x86 Variable Attributes}. + +-In order to read data located with the @code{progmem} attribute +-(inline) assembler must be used. +-@smallexample +-/* Use custom macros from @w{@uref{http://nongnu.org/avr-libc/user-manual/,AVR-LibC}} */ +-#include ++@cindex @code{altivec} variable attribute, PowerPC ++For documentation of @code{altivec} attribute please see the ++documentation in @ref{PowerPC Type Attributes}. + +-/* Locate var in flash memory */ +-const int var[2] PROGMEM = @{ 1, 2 @}; ++@node SPU Variable Attributes ++@subsection SPU Variable Attributes + +-int read_var (int i) +-@{ +- /* Access var[] by accessor macro from avr/pgmspace.h */ +- return (int) pgm_read_word (& var[i]); +-@} +-@end smallexample ++@cindex @code{spu_vector} variable attribute, SPU ++The SPU supports the @code{spu_vector} attribute for variables. For ++documentation of this attribute please see the documentation in ++@ref{SPU Type Attributes}. + +-AVR is a Harvard architecture processor and data and read-only data +-normally resides in the data memory (RAM). ++@node x86 Variable Attributes ++@subsection x86 Variable Attributes + +-See also the @ref{AVR Named Address Spaces} section for +-an alternate way to locate and access data in flash memory. ++Two attributes are currently defined for x86 configurations: ++@code{ms_struct} and @code{gcc_struct}. + +-@item io +-@itemx io (@var{addr}) +-@cindex @code{io} variable attribute, AVR +-Variables with the @code{io} attribute are used to address +-memory-mapped peripherals in the io address range. +-If an address is specified, the variable +-is assigned that address, and the value is interpreted as an +-address in the data address space. +-Example: ++@table @code ++@item ms_struct ++@itemx gcc_struct ++@cindex @code{ms_struct} variable attribute, x86 ++@cindex @code{gcc_struct} variable attribute, x86 ++ ++If @code{packed} is used on a structure, or if bit-fields are used, ++it may be that the Microsoft ABI lays out the structure differently ++than the way GCC normally does. Particularly when moving packed ++data between functions compiled with GCC and the native Microsoft compiler ++(either via function call or as data in a file), it may be necessary to access ++either format. ++ ++Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86 ++compilers to match the native Microsoft compiler. ++ ++The Microsoft structure layout algorithm is fairly simple with the exception ++of the bit-field packing. ++The padding and alignment of members of structures and whether a bit-field ++can straddle a storage-unit boundary are determine by these rules: ++ ++@enumerate ++@item Structure members are stored sequentially in the order in which they are ++declared: the first member has the lowest memory address and the last member ++the highest. ++ ++@item Every data object has an alignment requirement. The alignment requirement ++for all data except structures, unions, and arrays is either the size of the ++object or the current packing size (specified with either the ++@code{aligned} attribute or the @code{pack} pragma), ++whichever is less. For structures, unions, and arrays, ++the alignment requirement is the largest alignment requirement of its members. ++Every object is allocated an offset so that: + + @smallexample +-volatile int porta __attribute__((io (0x22))); ++offset % alignment_requirement == 0 + @end smallexample + +-The address specified in the address in the data address range. ++@item Adjacent bit-fields are packed into the same 1-, 2-, or 4-byte allocation ++unit if the integral types are the same size and if the next bit-field fits ++into the current allocation unit without crossing the boundary imposed by the ++common alignment requirements of the bit-fields. ++@end enumerate + +-Otherwise, the variable it is not assigned an address, but the +-compiler will still use in/out instructions where applicable, +-assuming some other module assigns an address in the io address range. +-Example: ++MSVC interprets zero-length bit-fields in the following ways: ++ ++@enumerate ++@item If a zero-length bit-field is inserted between two bit-fields that ++are normally coalesced, the bit-fields are not coalesced. ++ ++For example: + + @smallexample +-extern volatile int porta __attribute__((io)); ++struct ++ @{ ++ unsigned long bf_1 : 12; ++ unsigned long : 0; ++ unsigned long bf_2 : 12; ++ @} t1; + @end smallexample + +-@item io_low +-@itemx io_low (@var{addr}) +-@cindex @code{io_low} variable attribute, AVR +-This is like the @code{io} attribute, but additionally it informs the +-compiler that the object lies in the lower half of the I/O area, +-allowing the use of @code{cbi}, @code{sbi}, @code{sbic} and @code{sbis} +-instructions. ++@noindent ++The size of @code{t1} is 8 bytes with the zero-length bit-field. If the ++zero-length bit-field were removed, @code{t1}'s size would be 4 bytes. + +-@item address +-@itemx address (@var{addr}) +-@cindex @code{address} variable attribute, AVR +-Variables with the @code{address} attribute are used to address +-memory-mapped peripherals that may lie outside the io address range. ++@item If a zero-length bit-field is inserted after a bit-field, @code{foo}, and the ++alignment of the zero-length bit-field is greater than the member that follows it, ++@code{bar}, @code{bar} is aligned as the type of the zero-length bit-field. ++ ++For example: + + @smallexample +-volatile int porta __attribute__((address (0x600))); ++struct ++ @{ ++ char foo : 4; ++ short : 0; ++ char bar; ++ @} t2; ++ ++struct ++ @{ ++ char foo : 4; ++ short : 0; ++ double bar; ++ @} t3; + @end smallexample + +-@end table ++@noindent ++For @code{t2}, @code{bar} is placed at offset 2, rather than offset 1. ++Accordingly, the size of @code{t2} is 4. For @code{t3}, the zero-length ++bit-field does not affect the alignment of @code{bar} or, as a result, the size ++of the structure. + +-@subsection Blackfin Variable Attributes ++Taking this into account, it is important to note the following: + +-Three attributes are currently defined for the Blackfin. ++@enumerate ++@item If a zero-length bit-field follows a normal bit-field, the type of the ++zero-length bit-field may affect the alignment of the structure as whole. For ++example, @code{t2} has a size of 4 bytes, since the zero-length bit-field follows a ++normal bit-field, and is of type short. + +-@table @code +-@item l1_data +-@itemx l1_data_A +-@itemx l1_data_B +-@cindex @code{l1_data} variable attribute, Blackfin +-@cindex @code{l1_data_A} variable attribute, Blackfin +-@cindex @code{l1_data_B} variable attribute, Blackfin +-Use these attributes on the Blackfin to place the variable into L1 Data SRAM. +-Variables with @code{l1_data} attribute are put into the specific section +-named @code{.l1.data}. Those with @code{l1_data_A} attribute are put into +-the specific section named @code{.l1.data.A}. Those with @code{l1_data_B} +-attribute are put into the specific section named @code{.l1.data.B}. ++@item Even if a zero-length bit-field is not followed by a normal bit-field, it may ++still affect the alignment of the structure: + +-@item l2 +-@cindex @code{l2} variable attribute, Blackfin +-Use this attribute on the Blackfin to place the variable into L2 SRAM. +-Variables with @code{l2} attribute are put into the specific section +-named @code{.l2.data}. ++@smallexample ++struct ++ @{ ++ char foo : 6; ++ long : 0; ++ @} t4; ++@end smallexample ++ ++@noindent ++Here, @code{t4} takes up 4 bytes. ++@end enumerate ++ ++@item Zero-length bit-fields following non-bit-field members are ignored: ++ ++@smallexample ++struct ++ @{ ++ char foo; ++ long : 0; ++ char bar; ++ @} t5; ++@end smallexample ++ ++@noindent ++Here, @code{t5} takes up 2 bytes. ++@end enumerate + @end table + +-@subsection H8/300 Variable Attributes ++@node Xstormy16 Variable Attributes ++@subsection Xstormy16 Variable Attributes + +-These variable attributes are available for H8/300 targets: ++One attribute is currently defined for xstormy16 configurations: ++@code{below100}. + + @table @code +-@item eightbit_data +-@cindex @code{eightbit_data} variable attribute, H8/300 +-@cindex eight-bit data on the H8/300, H8/300H, and H8S +-Use this attribute on the H8/300, H8/300H, and H8S to indicate that the specified +-variable should be placed into the eight-bit data section. +-The compiler generates more efficient code for certain operations +-on data in the eight-bit data area. Note the eight-bit data area is limited to +-256 bytes of data. +- +-You must use GAS and GLD from GNU binutils version 2.7 or later for +-this attribute to work correctly. ++@item below100 ++@cindex @code{below100} variable attribute, Xstormy16 + +-@item tiny_data +-@cindex @code{tiny_data} variable attribute, H8/300 +-@cindex tiny data section on the H8/300H and H8S +-Use this attribute on the H8/300H and H8S to indicate that the specified +-variable should be placed into the tiny data section. +-The compiler generates more efficient code for loads and stores +-on data in the tiny data section. Note the tiny data area is limited to +-slightly under 32KB of data. ++If a variable has the @code{below100} attribute (@code{BELOW100} is ++allowed also), GCC places the variable in the first 0x100 bytes of ++memory and use special opcodes to access it. Such variables are ++placed in either the @code{.bss_below100} section or the ++@code{.data_below100} section. + + @end table + +-@subsection IA-64 Variable Attributes ++@node Type Attributes ++@section Specifying Attributes of Types ++@cindex attribute of types ++@cindex type attributes + +-The IA-64 back end supports the following variable attribute: ++The keyword @code{__attribute__} allows you to specify special ++attributes of types. Some type attributes apply only to @code{struct} ++and @code{union} types, while others can apply to any type defined ++via a @code{typedef} declaration. Other attributes are defined for ++functions (@pxref{Function Attributes}), labels (@pxref{Label ++Attributes}) and for variables (@pxref{Variable Attributes}). + +-@table @code +-@item model (@var{model-name}) +-@cindex @code{model} variable attribute, IA-64 ++The @code{__attribute__} keyword is followed by an attribute specification ++inside double parentheses. + +-On IA-64, use this attribute to set the addressability of an object. +-At present, the only supported identifier for @var{model-name} is +-@code{small}, indicating addressability via ``small'' (22-bit) +-addresses (so that their addresses can be loaded with the @code{addl} +-instruction). Caveat: such addressing is by definition not position +-independent and hence this attribute must not be used for objects +-defined by shared libraries. ++You may specify type attributes in an enum, struct or union type ++declaration or definition by placing them immediately after the ++@code{struct}, @code{union} or @code{enum} keyword. A less preferred ++syntax is to place them just past the closing curly brace of the ++definition. + +-@end table ++You can also include type attributes in a @code{typedef} declaration. ++@xref{Attribute Syntax}, for details of the exact syntax for using ++attributes. + +-@subsection M32R/D Variable Attributes ++@menu ++* Common Type Attributes:: ++* ARM Type Attributes:: ++* MeP Type Attributes:: ++* PowerPC Type Attributes:: ++* SPU Type Attributes:: ++* x86 Type Attributes:: ++@end menu + +-One attribute is currently defined for the M32R/D@. ++@node Common Type Attributes ++@subsection Common Type Attributes ++ ++The following type attributes are supported on most targets. + + @table @code +-@item model (@var{model-name}) +-@cindex @code{model-name} variable attribute, M32R/D +-@cindex variable addressability on the M32R/D +-Use this attribute on the M32R/D to set the addressability of an object. +-The identifier @var{model-name} is one of @code{small}, @code{medium}, +-or @code{large}, representing each of the code models. ++@cindex @code{aligned} type attribute ++@item aligned (@var{alignment}) ++This attribute specifies a minimum alignment (in bytes) for variables ++of the specified type. For example, the declarations: + +-Small model objects live in the lower 16MB of memory (so that their +-addresses can be loaded with the @code{ld24} instruction). ++@smallexample ++struct S @{ short f[3]; @} __attribute__ ((aligned (8))); ++typedef int more_aligned_int __attribute__ ((aligned (8))); ++@end smallexample + +-Medium and large model objects may live anywhere in the 32-bit address space +-(the compiler generates @code{seth/add3} instructions to load their +-addresses). +-@end table ++@noindent ++force the compiler to ensure (as far as it can) that each variable whose ++type is @code{struct S} or @code{more_aligned_int} is allocated and ++aligned @emph{at least} on a 8-byte boundary. On a SPARC, having all ++variables of type @code{struct S} aligned to 8-byte boundaries allows ++the compiler to use the @code{ldd} and @code{std} (doubleword load and ++store) instructions when copying one variable of type @code{struct S} to ++another, thus improving run-time efficiency. + +-@anchor{MeP Variable Attributes} +-@subsection MeP Variable Attributes ++Note that the alignment of any given @code{struct} or @code{union} type ++is required by the ISO C standard to be at least a perfect multiple of ++the lowest common multiple of the alignments of all of the members of ++the @code{struct} or @code{union} in question. This means that you @emph{can} ++effectively adjust the alignment of a @code{struct} or @code{union} ++type by attaching an @code{aligned} attribute to any one of the members ++of such a type, but the notation illustrated in the example above is a ++more obvious, intuitive, and readable way to request the compiler to ++adjust the alignment of an entire @code{struct} or @code{union} type. + +-The MeP target has a number of addressing modes and busses. The +-@code{near} space spans the standard memory space's first 16 megabytes +-(24 bits). The @code{far} space spans the entire 32-bit memory space. +-The @code{based} space is a 128-byte region in the memory space that +-is addressed relative to the @code{$tp} register. The @code{tiny} +-space is a 65536-byte region relative to the @code{$gp} register. In +-addition to these memory regions, the MeP target has a separate 16-bit +-control bus which is specified with @code{cb} attributes. ++As in the preceding example, you can explicitly specify the alignment ++(in bytes) that you wish the compiler to use for a given @code{struct} ++or @code{union} type. Alternatively, you can leave out the alignment factor ++and just ask the compiler to align a type to the maximum ++useful alignment for the target machine you are compiling for. For ++example, you could write: + +-@table @code ++@smallexample ++struct S @{ short f[3]; @} __attribute__ ((aligned)); ++@end smallexample + +-@item based +-@cindex @code{based} variable attribute, MeP +-Any variable with the @code{based} attribute is assigned to the +-@code{.based} section, and is accessed with relative to the +-@code{$tp} register. ++Whenever you leave out the alignment factor in an @code{aligned} ++attribute specification, the compiler automatically sets the alignment ++for the type to the largest alignment that is ever used for any data ++type on the target machine you are compiling for. Doing this can often ++make copy operations more efficient, because the compiler can use ++whatever instructions copy the biggest chunks of memory when performing ++copies to or from the variables that have types that you have aligned ++this way. + +-@item tiny +-@cindex @code{tiny} variable attribute, MeP +-Likewise, the @code{tiny} attribute assigned variables to the +-@code{.tiny} section, relative to the @code{$gp} register. ++In the example above, if the size of each @code{short} is 2 bytes, then ++the size of the entire @code{struct S} type is 6 bytes. The smallest ++power of two that is greater than or equal to that is 8, so the ++compiler sets the alignment for the entire @code{struct S} type to 8 ++bytes. + +-@item near +-@cindex @code{near} variable attribute, MeP +-Variables with the @code{near} attribute are assumed to have addresses +-that fit in a 24-bit addressing mode. This is the default for large +-variables (@code{-mtiny=4} is the default) but this attribute can +-override @code{-mtiny=} for small variables, or override @code{-ml}. ++Note that although you can ask the compiler to select a time-efficient ++alignment for a given type and then declare only individual stand-alone ++objects of that type, the compiler's ability to select a time-efficient ++alignment is primarily useful only when you plan to create arrays of ++variables having the relevant (efficiently aligned) type. If you ++declare or use arrays of variables of an efficiently-aligned type, then ++it is likely that your program also does pointer arithmetic (or ++subscripting, which amounts to the same thing) on pointers to the ++relevant type, and the code that the compiler generates for these ++pointer arithmetic operations is often more efficient for ++efficiently-aligned types than for other types. + +-@item far +-@cindex @code{far} variable attribute, MeP +-Variables with the @code{far} attribute are addressed using a full +-32-bit address. Since this covers the entire memory space, this +-allows modules to make no assumptions about where variables might be +-stored. ++The @code{aligned} attribute can only increase the alignment; but you ++can decrease it by specifying @code{packed} as well. See below. + +-@item io +-@cindex @code{io} variable attribute, MeP +-@itemx io (@var{addr}) +-Variables with the @code{io} attribute are used to address +-memory-mapped peripherals. If an address is specified, the variable +-is assigned that address, else it is not assigned an address (it is +-assumed some other module assigns an address). Example: ++Note that the effectiveness of @code{aligned} attributes may be limited ++by inherent limitations in your linker. On many systems, the linker is ++only able to arrange for variables to be aligned up to a certain maximum ++alignment. (For some linkers, the maximum supported alignment may ++be very very small.) If your linker is only able to align variables ++up to a maximum of 8-byte alignment, then specifying @code{aligned(16)} ++in an @code{__attribute__} still only provides you with 8-byte ++alignment. See your linker documentation for further information. ++ ++@opindex fshort-enums ++Specifying this attribute for @code{struct} and @code{union} types is ++equivalent to specifying the @code{packed} attribute on each of the ++structure or union members. Specifying the @option{-fshort-enums} ++flag on the line is equivalent to specifying the @code{packed} ++attribute on all @code{enum} definitions. ++ ++In the following example @code{struct my_packed_struct}'s members are ++packed closely together, but the internal layout of its @code{s} member ++is not packed---to do that, @code{struct my_unpacked_struct} needs to ++be packed too. + + @smallexample +-int timer_count __attribute__((io(0x123))); ++struct my_unpacked_struct ++ @{ ++ char c; ++ int i; ++ @}; ++ ++struct __attribute__ ((__packed__)) my_packed_struct ++ @{ ++ char c; ++ int i; ++ struct my_unpacked_struct s; ++ @}; + @end smallexample + +-@item cb +-@itemx cb (@var{addr}) +-@cindex @code{cb} variable attribute, MeP +-Variables with the @code{cb} attribute are used to access the control +-bus, using special instructions. @code{addr} indicates the control bus +-address. Example: ++You may only specify this attribute on the definition of an @code{enum}, ++@code{struct} or @code{union}, not on a @code{typedef} that does not ++also define the enumerated type, structure or union. ++ ++@item bnd_variable_size ++@cindex @code{bnd_variable_size} type attribute ++@cindex Pointer Bounds Checker attributes ++When applied to a structure field, this attribute tells Pointer ++Bounds Checker that the size of this field should not be computed ++using static type information. It may be used to mark variably-sized ++static array fields placed at the end of a structure. ++ ++@smallexample ++struct S ++@{ ++ int size; ++ char data[1]; ++@} ++S *p = (S *)malloc (sizeof(S) + 100); ++p->data[10] = 0; //Bounds violation ++@end smallexample ++ ++@noindent ++By using an attribute for the field we may avoid unwanted bound ++violation checks: ++ ++@smallexample ++struct S ++@{ ++ int size; ++ char data[1] __attribute__((bnd_variable_size)); ++@} ++S *p = (S *)malloc (sizeof(S) + 100); ++p->data[10] = 0; //OK ++@end smallexample ++ ++@item deprecated ++@itemx deprecated (@var{msg}) ++@cindex @code{deprecated} type attribute ++The @code{deprecated} attribute results in a warning if the type ++is used anywhere in the source file. This is useful when identifying ++types that are expected to be removed in a future version of a program. ++If possible, the warning also includes the location of the declaration ++of the deprecated type, to enable users to easily find further ++information about why the type is deprecated, or what they should do ++instead. Note that the warnings only occur for uses and then only ++if the type is being applied to an identifier that itself is not being ++declared as deprecated. - @end table + @smallexample +-int cpu_clock __attribute__((cb(0x123))); ++typedef int T1 __attribute__ ((deprecated)); ++T1 x; ++typedef T1 T2; ++T2 y; ++typedef T1 T3 __attribute__ ((deprecated)); ++T3 z __attribute__ ((deprecated)); + @end smallexample --@anchor{AVR Variable Attributes} -+@node AVR Variable Attributes - @subsection AVR Variable Attributes +-@end table +- +-@subsection PowerPC Variable Attributes ++@noindent ++results in a warning on line 2 and 3 but not lines 4, 5, or 6. No ++warning is issued for line 4 because T2 is not explicitly ++deprecated. Line 5 has no warning because T3 is explicitly ++deprecated. Similarly for line 6. The optional @var{msg} ++argument, which must be a string, is printed in the warning if ++present. + +-Three attributes currently are defined for PowerPC configurations: +-@code{altivec}, @code{ms_struct} and @code{gcc_struct}. ++The @code{deprecated} attribute can also be used for functions and ++variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.) + +-@cindex @code{ms_struct} variable attribute, PowerPC +-@cindex @code{gcc_struct} variable attribute, PowerPC +-For full documentation of the struct attributes please see the +-documentation in @ref{x86 Variable Attributes}. ++@item designated_init ++@cindex @code{designated_init} type attribute ++This attribute may only be applied to structure types. It indicates ++that any initialization of an object of this type must use designated ++initializers rather than positional initializers. The intent of this ++attribute is to allow the programmer to indicate that a structure's ++layout may change, and that therefore relying on positional ++initialization will result in future breakage. - @table @code -@@ -5388,6 +5776,7 @@ volatile int porta __attribute__((address (0x600))); +-@cindex @code{altivec} variable attribute, PowerPC +-For documentation of @code{altivec} attribute please see the +-documentation in @ref{PowerPC Type Attributes}. ++GCC emits warnings based on this attribute by default; use ++@option{-Wno-designated-init} to suppress them. - @end table +-@subsection SPU Variable Attributes ++@item may_alias ++@cindex @code{may_alias} type attribute ++Accesses through pointers to types with this attribute are not subject ++to type-based alias analysis, but are instead assumed to be able to alias ++any other type of objects. ++In the context of section 6.5 paragraph 7 of the C99 standard, ++an lvalue expression ++dereferencing such a pointer is treated like having a character type. ++See @option{-fstrict-aliasing} for more information on aliasing issues. ++This extension exists to support some vector APIs, in which pointers to ++one vector type are permitted to alias pointers to a different vector type. + +-@cindex @code{spu_vector} variable attribute, SPU +-The SPU supports the @code{spu_vector} attribute for variables. For +-documentation of this attribute please see the documentation in +-@ref{SPU Type Attributes}. ++Note that an object of a type with this attribute does not have any ++special semantics. -+@node Blackfin Variable Attributes - @subsection Blackfin Variable Attributes +-@anchor{x86 Variable Attributes} +-@subsection x86 Variable Attributes ++Example of use: - Three attributes are currently defined for the Blackfin. -@@ -5412,6 +5801,7 @@ Variables with @code{l2} attribute are put into the specific section - named @code{.l2.data}. - @end table +-Two attributes are currently defined for x86 configurations: +-@code{ms_struct} and @code{gcc_struct}. ++@smallexample ++typedef short __attribute__((__may_alias__)) short_a; -+@node H8/300 Variable Attributes - @subsection H8/300 Variable Attributes +-@table @code +-@item ms_struct +-@itemx gcc_struct +-@cindex @code{ms_struct} variable attribute, x86 +-@cindex @code{gcc_struct} variable attribute, x86 ++int ++main (void) ++@{ ++ int a = 0x12345678; ++ short_a *b = (short_a *) &a; - These variable attributes are available for H8/300 targets: -@@ -5440,6 +5830,7 @@ slightly under 32KB of data. +-If @code{packed} is used on a structure, or if bit-fields are used, +-it may be that the Microsoft ABI lays out the structure differently +-than the way GCC normally does. Particularly when moving packed +-data between functions compiled with GCC and the native Microsoft compiler +-(either via function call or as data in a file), it may be necessary to access +-either format. ++ b[1] = 0; + +-Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86 +-compilers to match the native Microsoft compiler. ++ if (a == 0x12345678) ++ abort(); + +-The Microsoft structure layout algorithm is fairly simple with the exception +-of the bit-field packing. +-The padding and alignment of members of structures and whether a bit-field +-can straddle a storage-unit boundary are determine by these rules: ++ exit(0); ++@} ++@end smallexample - @end table +-@enumerate +-@item Structure members are stored sequentially in the order in which they are +-declared: the first member has the lowest memory address and the last member +-the highest. ++@noindent ++If you replaced @code{short_a} with @code{short} in the variable ++declaration, the above program would abort when compiled with ++@option{-fstrict-aliasing}, which is on by default at @option{-O2} or ++above. + +-@item Every data object has an alignment requirement. The alignment requirement +-for all data except structures, unions, and arrays is either the size of the +-object or the current packing size (specified with either the +-@code{aligned} attribute or the @code{pack} pragma), +-whichever is less. For structures, unions, and arrays, +-the alignment requirement is the largest alignment requirement of its members. +-Every object is allocated an offset so that: ++@item packed ++@cindex @code{packed} type attribute ++This attribute, attached to @code{struct} or @code{union} type ++definition, specifies that each member (other than zero-width bit-fields) ++of the structure or union is placed to minimize the memory required. When ++attached to an @code{enum} definition, it indicates that the smallest ++integral type should be used. -+@node IA-64 Variable Attributes - @subsection IA-64 Variable Attributes +-@smallexample +-offset % alignment_requirement == 0 +-@end smallexample ++@item transparent_union ++@cindex @code{transparent_union} type attribute - The IA-64 back end supports the following variable attribute: -@@ -5458,6 +5849,7 @@ defined by shared libraries. +-@item Adjacent bit-fields are packed into the same 1-, 2-, or 4-byte allocation +-unit if the integral types are the same size and if the next bit-field fits +-into the current allocation unit without crossing the boundary imposed by the +-common alignment requirements of the bit-fields. +-@end enumerate ++This attribute, attached to a @code{union} type definition, indicates ++that any function parameter having that union type causes calls to that ++function to be treated in a special way. - @end table +-MSVC interprets zero-length bit-fields in the following ways: ++First, the argument corresponding to a transparent union type can be of ++any type in the union; no cast is required. Also, if the union contains ++a pointer type, the corresponding argument can be a null pointer ++constant or a void pointer expression; and if the union contains a void ++pointer type, the corresponding argument can be any pointer expression. ++If the union member type is a pointer, qualifiers like @code{const} on ++the referenced type must be respected, just as with normal pointer ++conversions. -+@node M32R/D Variable Attributes - @subsection M32R/D Variable Attributes +-@enumerate +-@item If a zero-length bit-field is inserted between two bit-fields that +-are normally coalesced, the bit-fields are not coalesced. ++Second, the argument is passed to the function using the calling ++conventions of the first member of the transparent union, not the calling ++conventions of the union itself. All members of the union must have the ++same machine representation; this is necessary for this argument passing ++to work properly. - One attribute is currently defined for the M32R/D@. -@@ -5478,7 +5870,7 @@ Medium and large model objects may live anywhere in the 32-bit address space - addresses). - @end table +-For example: ++Transparent unions are designed for library functions that have multiple ++interfaces for compatibility reasons. For example, suppose the ++@code{wait} function must accept either a value of type @code{int *} to ++comply with POSIX, or a value of type @code{union wait *} to comply with ++the 4.1BSD interface. If @code{wait}'s parameter were @code{void *}, ++@code{wait} would accept both kinds of arguments, but it would also ++accept any other pointer type and this would make argument type checking ++less useful. Instead, @code{} might define the interface ++as follows: --@anchor{MeP Variable Attributes} -+@node MeP Variable Attributes - @subsection MeP Variable Attributes + @smallexample +-struct +- @{ +- unsigned long bf_1 : 12; +- unsigned long : 0; +- unsigned long bf_2 : 12; +- @} t1; ++typedef union __attribute__ ((__transparent_union__)) ++ @{ ++ int *__ip; ++ union wait *__up; ++ @} wait_status_ptr_t; ++ ++pid_t wait (wait_status_ptr_t); + @end smallexample - The MeP target has a number of addressing modes and busses. The -@@ -5536,12 +5928,78 @@ Variables with the @code{cb} attribute are used to access the control - bus, using special instructions. @code{addr} indicates the control bus - address. Example: + @noindent +-The size of @code{t1} is 8 bytes with the zero-length bit-field. If the +-zero-length bit-field were removed, @code{t1}'s size would be 4 bytes. ++This interface allows either @code{int *} or @code{union wait *} ++arguments to be passed, using the @code{int *} calling convention. ++The program can call @code{wait} with arguments of either type: --@smallexample --int cpu_clock __attribute__((cb(0x123))); --@end smallexample +-@item If a zero-length bit-field is inserted after a bit-field, @code{foo}, and the +-alignment of the zero-length bit-field is greater than the member that follows it, +-@code{bar}, @code{bar} is aligned as the type of the zero-length bit-field. +@smallexample -+int cpu_clock __attribute__((cb(0x123))); ++int w1 () @{ int w; return wait (&w); @} ++int w2 () @{ union wait w; return wait (&w); @} +@end smallexample -+ + +-For example: ++@noindent ++With this interface, @code{wait}'s implementation might look like this: + + @smallexample +-struct +- @{ +- char foo : 4; +- short : 0; +- char bar; +- @} t2; +- +-struct +- @{ +- char foo : 4; +- short : 0; +- double bar; +- @} t3; ++pid_t wait (wait_status_ptr_t p) ++@{ ++ return waitpid (-1, p.__ip, 0); ++@} + @end smallexample + +-@noindent +-For @code{t2}, @code{bar} is placed at offset 2, rather than offset 1. +-Accordingly, the size of @code{t2} is 4. For @code{t3}, the zero-length +-bit-field does not affect the alignment of @code{bar} or, as a result, the size +-of the structure. ++@item unused ++@cindex @code{unused} type attribute ++When attached to a type (including a @code{union} or a @code{struct}), ++this attribute means that variables of that type are meant to appear ++possibly unused. GCC does not produce a warning for any variables of ++that type, even if the variable appears to do nothing. This is often ++the case with lock or thread classes, which are usually defined and then ++not referenced, but contain constructors and destructors that have ++nontrivial bookkeeping functions. + +-Taking this into account, it is important to note the following: ++@item visibility ++@cindex @code{visibility} type attribute ++In C++, attribute visibility (@pxref{Function Attributes}) can also be ++applied to class, struct, union and enum types. Unlike other type ++attributes, the attribute must appear between the initial keyword and ++the name of the type; it cannot appear after the body of the type. + +-@enumerate +-@item If a zero-length bit-field follows a normal bit-field, the type of the +-zero-length bit-field may affect the alignment of the structure as whole. For +-example, @code{t2} has a size of 4 bytes, since the zero-length bit-field follows a +-normal bit-field, and is of type short. ++Note that the type visibility is applied to vague linkage entities ++associated with the class (vtable, typeinfo node, etc.). In ++particular, if a class is thrown as an exception in one shared object ++and caught in another, the class must have default visibility. ++Otherwise the two shared objects are unable to use the same ++typeinfo node and exception handling will break. + +-@item Even if a zero-length bit-field is not followed by a normal bit-field, it may +-still affect the alignment of the structure: +@end table + -+@node Microsoft Windows Variable Attributes -+@subsection Microsoft Windows Variable Attributes -+ -+You can use these attributes on Microsoft Windows targets. -+@ref{x86 Variable Attributes} for additional Windows compatibility -+attributes available on all x86 targets. -+ -+@table @code -+@item dllimport -+@itemx dllexport -+@cindex @code{dllimport} variable attribute -+@cindex @code{dllexport} variable attribute -+The @code{dllimport} and @code{dllexport} attributes are described in -+@ref{Microsoft Windows Function Attributes}. ++To specify multiple attributes, separate them by commas within the ++double parentheses: for example, @samp{__attribute__ ((aligned (16), ++packed))}. + -+@item selectany -+@cindex @code{selectany} variable attribute -+The @code{selectany} attribute causes an initialized global variable to -+have link-once semantics. When multiple definitions of the variable are -+encountered by the linker, the first is selected and the remainder are -+discarded. Following usage by the Microsoft compiler, the linker is told -+@emph{not} to warn about size or content differences of the multiple -+definitions. -+ -+Although the primary usage of this attribute is for POD types, the -+attribute can also be applied to global C++ objects that are initialized -+by a constructor. In this case, the static initialization and destruction -+code for the object is emitted in each translation defining the object, -+but the calls to the constructor and destructor are protected by a -+link-once guard variable. -+ -+The @code{selectany} attribute is only available on Microsoft Windows -+targets. You can use @code{__declspec (selectany)} as a synonym for -+@code{__attribute__ ((selectany))} for compatibility with other -+compilers. ++@node ARM Type Attributes ++@subsection ARM Type Attributes + -+@item shared -+@cindex @code{shared} variable attribute -+On Microsoft Windows, in addition to putting variable definitions in a named -+section, the section can also be shared among all running copies of an -+executable or DLL@. For example, this small program defines shared data -+by putting it in a named section @code{shared} and marking the section -+shareable: ++@cindex @code{notshared} type attribute, ARM ++On those ARM targets that support @code{dllimport} (such as Symbian ++OS), you can use the @code{notshared} attribute to indicate that the ++virtual table and other similar data for a class should not be ++exported from a DLL@. For example: + +@smallexample -+int foo __attribute__((section ("shared"), shared)) = 0; -+ -+int -+main() -+@{ -+ /* @r{Read and write foo. All running -+ copies see the same value.} */ -+ return 0; ++class __declspec(notshared) C @{ ++public: ++ __declspec(dllimport) C(); ++ virtual void f(); +@} -+@end smallexample -+ -+@noindent -+You may only use the @code{shared} attribute along with @code{section} -+attribute with a fully-initialized global definition because of the way -+linkers work. See @code{section} attribute for more information. -+ -+The @code{shared} attribute is only available on Microsoft Windows@. - @end table +-@smallexample +-struct +- @{ +- char foo : 6; +- long : 0; +- @} t4; ++__declspec(dllexport) ++C::C() @{@} + @end smallexample -+@node PowerPC Variable Attributes - @subsection PowerPC Variable Attributes + @noindent +-Here, @code{t4} takes up 4 bytes. +-@end enumerate ++In this code, @code{C::C} is exported from the current DLL, but the ++virtual table for @code{C} is not exported. (You can use ++@code{__attribute__} instead of @code{__declspec} if you prefer, but ++most Symbian OS code uses @code{__declspec}.) - Three attributes currently are defined for PowerPC configurations: -@@ -5556,6 +6014,7 @@ documentation in @ref{x86 Variable Attributes}. - For documentation of @code{altivec} attribute please see the - documentation in @ref{PowerPC Type Attributes}. +-@item Zero-length bit-fields following non-bit-field members are ignored: ++@node MeP Type Attributes ++@subsection MeP Type Attributes -+@node SPU Variable Attributes - @subsection SPU Variable Attributes +-@smallexample +-struct +- @{ +- char foo; +- long : 0; +- char bar; +- @} t5; +-@end smallexample ++@cindex @code{based} type attribute, MeP ++@cindex @code{tiny} type attribute, MeP ++@cindex @code{near} type attribute, MeP ++@cindex @code{far} type attribute, MeP ++Many of the MeP variable attributes may be applied to types as well. ++Specifically, the @code{based}, @code{tiny}, @code{near}, and ++@code{far} attributes may be applied to either. The @code{io} and ++@code{cb} attributes may not be applied to types. - @cindex @code{spu_vector} variable attribute, SPU -@@ -5563,7 +6022,7 @@ The SPU supports the @code{spu_vector} attribute for variables. For - documentation of this attribute please see the documentation in - @ref{SPU Type Attributes}. +-@noindent +-Here, @code{t5} takes up 2 bytes. +-@end enumerate +-@end table ++@node PowerPC Type Attributes ++@subsection PowerPC Type Attributes --@anchor{x86 Variable Attributes} -+@node x86 Variable Attributes - @subsection x86 Variable Attributes +-@subsection Xstormy16 Variable Attributes ++Three attributes currently are defined for PowerPC configurations: ++@code{altivec}, @code{ms_struct} and @code{gcc_struct}. + +-One attribute is currently defined for xstormy16 configurations: +-@code{below100}. ++@cindex @code{ms_struct} type attribute, PowerPC ++@cindex @code{gcc_struct} type attribute, PowerPC ++For full documentation of the @code{ms_struct} and @code{gcc_struct} ++attributes please see the documentation in @ref{x86 Type Attributes}. - Two attributes are currently defined for x86 configurations: -@@ -5701,6 +6160,7 @@ Here, @code{t5} takes up 2 bytes. - @end enumerate - @end table +-@table @code +-@item below100 +-@cindex @code{below100} variable attribute, Xstormy16 ++@cindex @code{altivec} type attribute, PowerPC ++The @code{altivec} attribute allows one to declare AltiVec vector data ++types supported by the AltiVec Programming Interface Manual. The ++attribute requires an argument to specify one of three vector types: ++@code{vector__}, @code{pixel__} (always followed by unsigned short), ++and @code{bool__} (always followed by unsigned). + +-If a variable has the @code{below100} attribute (@code{BELOW100} is +-allowed also), GCC places the variable in the first 0x100 bytes of +-memory and use special opcodes to access it. Such variables are +-placed in either the @code{.bss_below100} section or the +-@code{.data_below100} section. ++@smallexample ++__attribute__((altivec(vector__))) ++__attribute__((altivec(pixel__))) unsigned short ++__attribute__((altivec(bool__))) unsigned ++@end smallexample -+@node Xstormy16 Variable Attributes - @subsection Xstormy16 Variable Attributes +-@end table ++These attributes mainly are intended to support the @code{__vector}, ++@code{__pixel}, and @code{__bool} AltiVec keywords. - One attribute is currently defined for xstormy16 configurations: -@@ -5724,33 +6184,39 @@ placed in either the @code{.bss_below100} section or the - @cindex type attributes +-@node Type Attributes +-@section Specifying Attributes of Types +-@cindex attribute of types +-@cindex type attributes ++@node SPU Type Attributes ++@subsection SPU Type Attributes - The keyword @code{__attribute__} allows you to specify special +-The keyword @code{__attribute__} allows you to specify special -attributes of @code{struct} and @code{union} types when you define -such types. This keyword is followed by an attribute specification -inside double parentheses. Eight attributes are currently defined for -types: @code{aligned}, @code{packed}, @code{transparent_union}, -@code{unused}, @code{deprecated}, @code{visibility}, @code{may_alias} -and @code{bnd_variable_size}. Other attributes are defined for -+attributes of types. Some type attributes apply only to @code{struct} -+and @code{union} types, while others can apply to any type defined -+via a @code{typedef} declaration. Other attributes are defined for - functions (@pxref{Function Attributes}), labels (@pxref{Label - Attributes}) and for variables (@pxref{Variable Attributes}). +-functions (@pxref{Function Attributes}), labels (@pxref{Label +-Attributes}) and for variables (@pxref{Variable Attributes}). ++@cindex @code{spu_vector} type attribute, SPU ++The SPU supports the @code{spu_vector} attribute for types. This attribute ++allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU ++Language Extensions Specification. It is intended to support the ++@code{__vector} keyword. -You may also specify any one of these attributes with @samp{__} -preceding and following its keyword. This allows you to use these -attributes in header files without being concerned about a possible -macro of the same name. For example, you may use @code{__aligned__} -instead of @code{aligned}. -+The @code{__attribute__} keyword is followed by an attribute specification -+inside double parentheses. ++@node x86 Type Attributes ++@subsection x86 Type Attributes - You may specify type attributes in an enum, struct or union type +-You may specify type attributes in an enum, struct or union type -declaration or definition, or for other types in a @code{typedef} -declaration. -- ++Two attributes are currently defined for x86 configurations: ++@code{ms_struct} and @code{gcc_struct}. + -For an enum, struct or union type, you may specify attributes either -between the enum, struct or union tag and the name of the type, or -just past the closing curly brace of the @emph{definition}. The -former syntax is preferred. -+declaration or definition by placing them immediately after the -+@code{struct}, @code{union} or @code{enum} keyword. A less preferred -+syntax is to place them just past the closing curly brace of the -+definition. - -+You can also include type attributes in a @code{typedef} declaration. - @xref{Attribute Syntax}, for details of the exact syntax for using - attributes. ++@table @code -+@menu -+* Common Type Attributes:: -+* ARM Type Attributes:: -+* MeP Type Attributes:: -+* PowerPC Type Attributes:: -+* SPU Type Attributes:: -+* x86 Type Attributes:: -+@end menu -+ -+@node Common Type Attributes -+@subsection Common Type Attributes -+ -+The following type attributes are supported on most targets. -+ - @table @code - @cindex @code{aligned} type attribute - @item aligned (@var{alignment}) -@@ -5831,14 +6297,6 @@ up to a maximum of 8-byte alignment, then specifying @code{aligned(16)} - in an @code{__attribute__} still only provides you with 8-byte - alignment. See your linker documentation for further information. +-@xref{Attribute Syntax}, for details of the exact syntax for using +-attributes. ++@item ms_struct ++@itemx gcc_struct ++@cindex @code{ms_struct} type attribute, x86 ++@cindex @code{gcc_struct} type attribute, x86 + +-@table @code +-@cindex @code{aligned} type attribute +-@item aligned (@var{alignment}) +-This attribute specifies a minimum alignment (in bytes) for variables +-of the specified type. For example, the declarations: ++If @code{packed} is used on a structure, or if bit-fields are used ++it may be that the Microsoft ABI packs them differently ++than GCC normally packs them. Particularly when moving packed ++data between functions compiled with GCC and the native Microsoft compiler ++(either via function call or as data in a file), it may be necessary to access ++either format. + +-@smallexample +-struct S @{ short f[3]; @} __attribute__ ((aligned (8))); +-typedef int more_aligned_int __attribute__ ((aligned (8))); +-@end smallexample ++Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86 ++compilers to match the native Microsoft compiler. ++@end table + +-@noindent +-force the compiler to ensure (as far as it can) that each variable whose +-type is @code{struct S} or @code{more_aligned_int} is allocated and +-aligned @emph{at least} on a 8-byte boundary. On a SPARC, having all +-variables of type @code{struct S} aligned to 8-byte boundaries allows +-the compiler to use the @code{ldd} and @code{std} (doubleword load and +-store) instructions when copying one variable of type @code{struct S} to +-another, thus improving run-time efficiency. ++@node Label Attributes ++@section Label Attributes ++@cindex Label Attributes + +-Note that the alignment of any given @code{struct} or @code{union} type +-is required by the ISO C standard to be at least a perfect multiple of +-the lowest common multiple of the alignments of all of the members of +-the @code{struct} or @code{union} in question. This means that you @emph{can} +-effectively adjust the alignment of a @code{struct} or @code{union} +-type by attaching an @code{aligned} attribute to any one of the members +-of such a type, but the notation illustrated in the example above is a +-more obvious, intuitive, and readable way to request the compiler to +-adjust the alignment of an entire @code{struct} or @code{union} type. ++GCC allows attributes to be set on C labels. @xref{Attribute Syntax}, for ++details of the exact syntax for using attributes. Other attributes are ++available for functions (@pxref{Function Attributes}), variables ++(@pxref{Variable Attributes}) and for types (@pxref{Type Attributes}). + +-As in the preceding example, you can explicitly specify the alignment +-(in bytes) that you wish the compiler to use for a given @code{struct} +-or @code{union} type. Alternatively, you can leave out the alignment factor +-and just ask the compiler to align a type to the maximum +-useful alignment for the target machine you are compiling for. For +-example, you could write: ++This example uses the @code{cold} label attribute to indicate the ++@code{ErrorHandling} branch is unlikely to be taken and that the ++@code{ErrorHandling} label is unused: + + @smallexample +-struct S @{ short f[3]; @} __attribute__ ((aligned)); +-@end smallexample + +-Whenever you leave out the alignment factor in an @code{aligned} +-attribute specification, the compiler automatically sets the alignment +-for the type to the largest alignment that is ever used for any data +-type on the target machine you are compiling for. Doing this can often +-make copy operations more efficient, because the compiler can use +-whatever instructions copy the biggest chunks of memory when performing +-copies to or from the variables that have types that you have aligned +-this way. ++ asm goto ("some asm" : : : : NoError); + +-In the example above, if the size of each @code{short} is 2 bytes, then +-the size of the entire @code{struct S} type is 6 bytes. The smallest +-power of two that is greater than or equal to that is 8, so the +-compiler sets the alignment for the entire @code{struct S} type to 8 +-bytes. ++/* This branch (the fall-through from the asm) is less commonly used */ ++ErrorHandling: ++ __attribute__((cold, unused)); /* Semi-colon is required here */ ++ printf("error\n"); ++ return 0; + +-Note that although you can ask the compiler to select a time-efficient +-alignment for a given type and then declare only individual stand-alone +-objects of that type, the compiler's ability to select a time-efficient +-alignment is primarily useful only when you plan to create arrays of +-variables having the relevant (efficiently aligned) type. If you +-declare or use arrays of variables of an efficiently-aligned type, then +-it is likely that your program also does pointer arithmetic (or +-subscripting, which amounts to the same thing) on pointers to the +-relevant type, and the code that the compiler generates for these +-pointer arithmetic operations is often more efficient for +-efficiently-aligned types than for other types. ++NoError: ++ printf("no error\n"); ++ return 1; ++@end smallexample + +-The @code{aligned} attribute can only increase the alignment; but you +-can decrease it by specifying @code{packed} as well. See below. ++@table @code ++@item unused ++@cindex @code{unused} label attribute ++This feature is intended for program-generated code that may contain ++unused labels, but which is compiled with @option{-Wall}. It is ++not normally appropriate to use in it human-written code, though it ++could be useful in cases where the code that jumps to the label is ++contained within an @code{#ifdef} conditional. + +-Note that the effectiveness of @code{aligned} attributes may be limited +-by inherent limitations in your linker. On many systems, the linker is +-only able to arrange for variables to be aligned up to a certain maximum +-alignment. (For some linkers, the maximum supported alignment may +-be very very small.) If your linker is only able to align variables +-up to a maximum of 8-byte alignment, then specifying @code{aligned(16)} +-in an @code{__attribute__} still only provides you with 8-byte +-alignment. See your linker documentation for further information. ++@item hot ++@cindex @code{hot} label attribute ++The @code{hot} attribute on a label is used to inform the compiler that ++the path following the label is more likely than paths that are not so ++annotated. This attribute is used in cases where @code{__builtin_expect} ++cannot be used, for instance with computed goto or @code{asm goto}. -@item packed -@cindex @code{packed} type attribute @@ -4440,21 +5942,86 @@ -of the structure or union is placed to minimize the memory required. When -attached to an @code{enum} definition, it indicates that the smallest -integral type should be used. -- - @opindex fshort-enums - Specifying this attribute for @code{struct} and @code{union} types is - equivalent to specifying the @code{packed} attribute on each of the -@@ -5870,78 +6328,38 @@ You may only specify this attribute on the definition of an @code{enum}, - @code{struct} or @code{union}, not on a @code{typedef} that does not - also define the enumerated type, structure or union. ++@item cold ++@cindex @code{cold} label attribute ++The @code{cold} attribute on labels is used to inform the compiler that ++the path following the label is unlikely to be executed. This attribute ++is used in cases where @code{__builtin_expect} cannot be used, for instance ++with computed goto or @code{asm goto}. + +-@opindex fshort-enums +-Specifying this attribute for @code{struct} and @code{union} types is +-equivalent to specifying the @code{packed} attribute on each of the +-structure or union members. Specifying the @option{-fshort-enums} +-flag on the line is equivalent to specifying the @code{packed} +-attribute on all @code{enum} definitions. ++@end table + +-In the following example @code{struct my_packed_struct}'s members are +-packed closely together, but the internal layout of its @code{s} member +-is not packed---to do that, @code{struct my_unpacked_struct} needs to +-be packed too. ++@node Attribute Syntax ++@section Attribute Syntax ++@cindex attribute syntax + +-@smallexample +-struct my_unpacked_struct +- @{ +- char c; +- int i; +- @}; ++This section describes the syntax with which @code{__attribute__} may be ++used, and the constructs to which attribute specifiers bind, for the C ++language. Some details may vary for C++ and Objective-C@. Because of ++infelicities in the grammar for attributes, some forms described here ++may not be successfully parsed in all cases. + +-struct __attribute__ ((__packed__)) my_packed_struct +- @{ +- char c; +- int i; +- struct my_unpacked_struct s; +- @}; +-@end smallexample ++There are some problems with the semantics of attributes in C++. For ++example, there are no manglings for attributes, although they may affect ++code generation, so problems may arise when attributed types are used in ++conjunction with templates or overloading. Similarly, @code{typeid} ++does not distinguish between types with different attributes. Support ++for attributes in C++ may be restricted in future to attributes on ++declarations only, but not on nested declarators. ++ ++@xref{Function Attributes}, for details of the semantics of attributes ++applying to functions. @xref{Variable Attributes}, for details of the ++semantics of attributes applying to variables. @xref{Type Attributes}, ++for details of the semantics of attributes applying to structure, union ++and enumerated types. ++@xref{Label Attributes}, for details of the semantics of attributes ++applying to labels. + +-You may only specify this attribute on the definition of an @code{enum}, +-@code{struct} or @code{union}, not on a @code{typedef} that does not +-also define the enumerated type, structure or union. ++An @dfn{attribute specifier} is of the form ++@code{__attribute__ ((@var{attribute-list}))}. An @dfn{attribute list} ++is a possibly empty comma-separated sequence of @dfn{attributes}, where ++each attribute is one of the following: -@item transparent_union -@cindex @code{transparent_union} type attribute -- ++@itemize @bullet ++@item ++Empty. Empty attributes are ignored. + -This attribute, attached to a @code{union} type definition, indicates -that any function parameter having that union type causes calls to that -function to be treated in a special way. -- ++@item ++An attribute name ++(which may be an identifier such as @code{unused}, or a reserved ++word such as @code{const}). + -First, the argument corresponding to a transparent union type can be of -any type in the union; no cast is required. Also, if the union contains -a pointer type, the corresponding argument can be a null pointer @@ -4463,13 +6030,20 @@ -If the union member type is a pointer, qualifiers like @code{const} on -the referenced type must be respected, just as with normal pointer -conversions. -- ++@item ++An attribute name followed by a parenthesized list of ++parameters for the attribute. ++These parameters take one of the following forms: + -Second, the argument is passed to the function using the calling -conventions of the first member of the transparent union, not the calling -conventions of the union itself. All members of the union must have the -same machine representation; this is necessary for this argument passing -to work properly. -- ++@itemize @bullet ++@item ++An identifier. For example, @code{mode} attributes use this form. + -Transparent unions are designed for library functions that have multiple -interfaces for compatibility reasons. For example, suppose the -@code{wait} function must accept either a value of type @code{int *} to @@ -4479,57 +6053,62 @@ -accept any other pointer type and this would make argument type checking -less useful. Instead, @code{} might define the interface -as follows: -- ++@item ++An identifier followed by a comma and a non-empty comma-separated list ++of expressions. For example, @code{format} attributes use this form. + -@smallexample -typedef union __attribute__ ((__transparent_union__)) - @{ - int *__ip; - union wait *__up; - @} wait_status_ptr_t; -- ++@item ++A possibly empty comma-separated list of expressions. For example, ++@code{format_arg} attributes use this form with the list being a single ++integer constant expression, and @code{alias} attributes use this form ++with the list being a single string constant. ++@end itemize ++@end itemize + -pid_t wait (wait_status_ptr_t); -@end smallexample -- ++An @dfn{attribute specifier list} is a sequence of one or more attribute ++specifiers, not separated by any other tokens. + -@noindent -This interface allows either @code{int *} or @code{union wait *} -arguments to be passed, using the @code{int *} calling convention. -The program can call @code{wait} with arguments of either type: -+@item bnd_variable_size -+@cindex @code{bnd_variable_size} type attribute -+@cindex Pointer Bounds Checker attributes -+When applied to a structure field, this attribute tells Pointer -+Bounds Checker that the size of this field should not be computed -+using static type information. It may be used to mark variably-sized -+static array fields placed at the end of a structure. ++You may optionally specify attribute names with @samp{__} ++preceding and following the name. ++This allows you to use them in header files without ++being concerned about a possible macro of the same name. For example, ++you may use the attribute name @code{__noreturn__} instead of @code{noreturn}. - @smallexample +-@smallexample -int w1 () @{ int w; return wait (&w); @} -int w2 () @{ union wait w; return wait (&w); @} -+struct S -+@{ -+ int size; -+ char data[1]; -+@} -+S *p = (S *)malloc (sizeof(S) + 100); -+p->data[10] = 0; //Bounds violation - @end smallexample +-@end smallexample - @noindent +-@noindent -With this interface, @code{wait}'s implementation might look like this: -+By using an attribute for the field we may avoid unwanted bound -+violation checks: ++@subsubheading Label Attributes - @smallexample +-@smallexample -pid_t wait (wait_status_ptr_t p) -+struct S - @{ +-@{ - return waitpid (-1, p.__ip, 0); -+ int size; -+ char data[1] __attribute__((bnd_variable_size)); - @} -+S *p = (S *)malloc (sizeof(S) + 100); -+p->data[10] = 0; //OK - @end smallexample +-@} +-@end smallexample ++In GNU C, an attribute specifier list may appear after the colon following a ++label, other than a @code{case} or @code{default} label. GNU C++ only permits ++attributes on labels if the attribute specifier is immediately ++followed by a semicolon (i.e., the label applies to an empty ++statement). If the semicolon is missing, C++ label attributes are ++ambiguous, as it is permissible for a declaration, which could begin ++with an attribute list, to be labelled in C++. Declarations cannot be ++labelled in C90 or C99, so the ambiguity does not arise there. -@item unused -@cindex @code{unused} type attribute @@ -4540,32 +6119,157 @@ -the case with lock or thread classes, which are usually defined and then -not referenced, but contain constructors and destructors that have -nontrivial bookkeeping functions. ++@subsubheading Type Attributes + +-@item deprecated +-@itemx deprecated (@var{msg}) +-@cindex @code{deprecated} type attribute +-The @code{deprecated} attribute results in a warning if the type +-is used anywhere in the source file. This is useful when identifying +-types that are expected to be removed in a future version of a program. +-If possible, the warning also includes the location of the declaration +-of the deprecated type, to enable users to easily find further +-information about why the type is deprecated, or what they should do +-instead. Note that the warnings only occur for uses and then only +-if the type is being applied to an identifier that itself is not being +-declared as deprecated. ++An attribute specifier list may appear as part of a @code{struct}, ++@code{union} or @code{enum} specifier. It may go either immediately ++after the @code{struct}, @code{union} or @code{enum} keyword, or after ++the closing brace. The former syntax is preferred. ++Where attribute specifiers follow the closing brace, they are considered ++to relate to the structure, union or enumerated type defined, not to any ++enclosing declaration the type specifier appears in, and the type ++defined is not complete until after the attribute specifiers. ++@c Otherwise, there would be the following problems: a shift/reduce ++@c conflict between attributes binding the struct/union/enum and ++@c binding to the list of specifiers/qualifiers; and "aligned" ++@c attributes could use sizeof for the structure, but the size could be ++@c changed later by "packed" attributes. + +-@smallexample +-typedef int T1 __attribute__ ((deprecated)); +-T1 x; +-typedef T1 T2; +-T2 y; +-typedef T1 T3 __attribute__ ((deprecated)); +-T3 z __attribute__ ((deprecated)); +-@end smallexample + +-@noindent +-results in a warning on line 2 and 3 but not lines 4, 5, or 6. No +-warning is issued for line 4 because T2 is not explicitly +-deprecated. Line 5 has no warning because T3 is explicitly +-deprecated. Similarly for line 6. The optional @var{msg} +-argument, which must be a string, is printed in the warning if +-present. ++@subsubheading All other attributes + +-The @code{deprecated} attribute can also be used for functions and +-variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.) ++Otherwise, an attribute specifier appears as part of a declaration, ++counting declarations of unnamed parameters and type names, and relates ++to that declaration (which may be nested in another declaration, for ++example in the case of a parameter declaration), or to a particular declarator ++within a declaration. Where an ++attribute specifier is applied to a parameter declared as a function or ++an array, it should apply to the function or array rather than the ++pointer to which the parameter is implicitly converted, but this is not ++yet correctly implemented. + +-@item may_alias +-@cindex @code{may_alias} type attribute +-Accesses through pointers to types with this attribute are not subject +-to type-based alias analysis, but are instead assumed to be able to alias +-any other type of objects. +-In the context of section 6.5 paragraph 7 of the C99 standard, +-an lvalue expression +-dereferencing such a pointer is treated like having a character type. +-See @option{-fstrict-aliasing} for more information on aliasing issues. +-This extension exists to support some vector APIs, in which pointers to +-one vector type are permitted to alias pointers to a different vector type. ++Any list of specifiers and qualifiers at the start of a declaration may ++contain attribute specifiers, whether or not such a list may in that ++context contain storage class specifiers. (Some attributes, however, ++are essentially in the nature of storage class specifiers, and only make ++sense where storage class specifiers may be used; for example, ++@code{section}.) There is one necessary limitation to this syntax: the ++first old-style parameter declaration in a function definition cannot ++begin with an attribute specifier, because such an attribute applies to ++the function instead by syntax described below (which, however, is not ++yet implemented in this case). In some other cases, attribute ++specifiers are permitted by this grammar but not yet supported by the ++compiler. All attribute specifiers in this place relate to the ++declaration as a whole. In the obsolescent usage where a type of ++@code{int} is implied by the absence of type specifiers, such a list of ++specifiers and qualifiers may be an attribute specifier list with no ++other specifiers or qualifiers. + +-Note that an object of a type with this attribute does not have any +-special semantics. ++At present, the first parameter in a function prototype must have some ++type specifier that is not an attribute specifier; this resolves an ++ambiguity in the interpretation of @code{void f(int ++(__attribute__((foo)) x))}, but is subject to change. At present, if ++the parentheses of a function declarator contain only attributes then ++those attributes are ignored, rather than yielding an error or warning ++or implying a single parameter of type int, but this is subject to ++change. + +-Example of use: ++An attribute specifier list may appear immediately before a declarator ++(other than the first) in a comma-separated list of declarators in a ++declaration of more than one identifier using a single list of ++specifiers and qualifiers. Such attribute specifiers apply ++only to the identifier before whose declarator they appear. For ++example, in + + @smallexample +-typedef short __attribute__((__may_alias__)) short_a; - - @item deprecated - @itemx deprecated (@var{msg}) - @cindex @code{deprecated} type attribute -@@ -5975,6 +6393,18 @@ present. - The @code{deprecated} attribute can also be used for functions and - variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.) +-int +-main (void) +-@{ +- int a = 0x12345678; +- short_a *b = (short_a *) &a; ++__attribute__((noreturn)) void d0 (void), ++ __attribute__((format(printf, 1, 2))) d1 (const char *, ...), ++ d2 (void); ++@end smallexample -+@item designated_init -+@cindex @code{designated_init} type attribute -+This attribute may only be applied to structure types. It indicates -+that any initialization of an object of this type must use designated -+initializers rather than positional initializers. The intent of this -+attribute is to allow the programmer to indicate that a structure's -+layout may change, and that therefore relying on positional -+initialization will result in future breakage. -+ -+GCC emits warnings based on this attribute by default; use -+@option{-Wno-designated-init} to suppress them. -+ - @item may_alias - @cindex @code{may_alias} type attribute - Accesses through pointers to types with this attribute are not subject -@@ -6016,70 +6446,107 @@ declaration, the above program would abort when compiled with - @option{-fstrict-aliasing}, which is on by default at @option{-O2} or - above. +- b[1] = 0; ++@noindent ++the @code{noreturn} attribute applies to all the functions ++declared; the @code{format} attribute only applies to @code{d1}. + +- if (a == 0x12345678) +- abort(); ++An attribute specifier list may appear immediately before the comma, ++@code{=} or semicolon terminating the declaration of an identifier other ++than a function definition. Such attribute specifiers apply ++to the declared object or function. Where an ++assembler name for an object or function is specified (@pxref{Asm ++Labels}), the attribute must follow the @code{asm} ++specification. + +- exit(0); +-@} +-@end smallexample ++An attribute specifier list may, in future, be permitted to appear after ++the declarator in a function definition (before any old-style parameter ++declarations or the function body). + +-@noindent +-If you replaced @code{short_a} with @code{short} in the variable +-declaration, the above program would abort when compiled with +-@option{-fstrict-aliasing}, which is on by default at @option{-O2} or +-above. ++Attribute specifiers may be mixed with type qualifiers appearing inside ++the @code{[]} of a parameter array declarator, in the C99 construct by ++which such qualifiers are applied to the pointer to which the array is ++implicitly converted. Such attribute specifiers apply to the pointer, ++not to the array, but at present this is not implemented and they are ++ignored. -@item visibility -@cindex @code{visibility} type attribute @@ -4573,13 +6277,15 @@ -applied to class, struct, union and enum types. Unlike other type -attributes, the attribute must appear between the initial keyword and -the name of the type; it cannot appear after the body of the type. -+@item packed -+@cindex @code{packed} type attribute -+This attribute, attached to @code{struct} or @code{union} type -+definition, specifies that each member (other than zero-width bit-fields) -+of the structure or union is placed to minimize the memory required. When -+attached to an @code{enum} definition, it indicates that the smallest -+integral type should be used. ++An attribute specifier list may appear at the start of a nested ++declarator. At present, there are some limitations in this usage: the ++attributes correctly apply to the declarator, but for most individual ++attributes the semantics this implies are not implemented. ++When attribute specifiers follow the @code{*} of a pointer ++declarator, they may be mixed with any type qualifiers present. ++The following describes the formal semantics of this syntax. It makes the ++most sense if you are familiar with the formal specification of ++declarators in the ISO C standard. -Note that the type visibility is applied to vague linkage entities -associated with the class (vtable, typeinfo node, etc.). In @@ -4587,8 +6293,12 @@ -and caught in another, the class must have default visibility. -Otherwise the two shared objects are unable to use the same -typeinfo node and exception handling will break. -+@item transparent_union -+@cindex @code{transparent_union} type attribute ++Consider (as in C99 subclause 6.7.5 paragraph 4) a declaration @code{T ++D1}, where @code{T} contains declaration specifiers that specify a type ++@var{Type} (such as @code{int}) and @code{D1} is a declarator that ++contains an identifier @var{ident}. The type specified for @var{ident} ++for derived declarators whose type does not include an attribute ++specifier is as in the ISO C standard. -@item designated_init -@cindex @code{designated_init} type attribute @@ -4598,20 +6308,21 @@ -attribute is to allow the programmer to indicate that a structure's -layout may change, and that therefore relying on positional -initialization will result in future breakage. -+This attribute, attached to a @code{union} type definition, indicates -+that any function parameter having that union type causes calls to that -+function to be treated in a special way. ++If @code{D1} has the form @code{( @var{attribute-specifier-list} D )}, ++and the declaration @code{T D} specifies the type ++``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then ++@code{T D1} specifies the type ``@var{derived-declarator-type-list} ++@var{attribute-specifier-list} @var{Type}'' for @var{ident}. -GCC emits warnings based on this attribute by default; use -@option{-Wno-designated-init} to suppress them. -+First, the argument corresponding to a transparent union type can be of -+any type in the union; no cast is required. Also, if the union contains -+a pointer type, the corresponding argument can be a null pointer -+constant or a void pointer expression; and if the union contains a void -+pointer type, the corresponding argument can be any pointer expression. -+If the union member type is a pointer, qualifiers like @code{const} on -+the referenced type must be respected, just as with normal pointer -+conversions. ++If @code{D1} has the form @code{* ++@var{type-qualifier-and-attribute-specifier-list} D}, and the ++declaration @code{T D} specifies the type ++``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then ++@code{T D1} specifies the type ``@var{derived-declarator-type-list} ++@var{type-qualifier-and-attribute-specifier-list} pointer to @var{Type}'' for ++@var{ident}. -@item bnd_variable_size -@cindex @code{bnd_variable_size} type attribute @@ -4620,21 +6331,7 @@ -Bounds Checker that the size of this field should not be computed -using static type information. It may be used to mark variably-sized -static array fields placed at the end of a structure. -+Second, the argument is passed to the function using the calling -+conventions of the first member of the transparent union, not the calling -+conventions of the union itself. All members of the union must have the -+same machine representation; this is necessary for this argument passing -+to work properly. -+ -+Transparent unions are designed for library functions that have multiple -+interfaces for compatibility reasons. For example, suppose the -+@code{wait} function must accept either a value of type @code{int *} to -+comply with POSIX, or a value of type @code{union wait *} to comply with -+the 4.1BSD interface. If @code{wait}'s parameter were @code{void *}, -+@code{wait} would accept both kinds of arguments, but it would also -+accept any other pointer type and this would make argument type checking -+less useful. Instead, @code{} might define the interface -+as follows: ++For example, @smallexample -struct S @@ -4644,113 +6341,224 @@ -@} -S *p = (S *)malloc (sizeof(S) + 100); -p->data[10] = 0; //Bounds violation -+typedef union __attribute__ ((__transparent_union__)) -+ @{ -+ int *__ip; -+ union wait *__up; -+ @} wait_status_ptr_t; -+ -+pid_t wait (wait_status_ptr_t); ++void (__attribute__((noreturn)) ****f) (void); @end smallexample @noindent -By using an attribute for the field we may avoid unwanted bound -violation checks: -+This interface allows either @code{int *} or @code{union wait *} -+arguments to be passed, using the @code{int *} calling convention. -+The program can call @code{wait} with arguments of either type: ++specifies the type ``pointer to pointer to pointer to pointer to ++non-returning function returning @code{void}''. As another example, @smallexample -struct S -+int w1 () @{ int w; return wait (&w); @} -+int w2 () @{ union wait w; return wait (&w); @} -+@end smallexample -+ -+@noindent -+With this interface, @code{wait}'s implementation might look like this: -+ -+@smallexample -+pid_t wait (wait_status_ptr_t p) - @{ +-@{ - int size; - char data[1] __attribute__((bnd_variable_size)); -+ return waitpid (-1, p.__ip, 0); - @} +-@} -S *p = (S *)malloc (sizeof(S) + 100); -p->data[10] = 0; //OK ++char *__attribute__((aligned(8))) *f; @end smallexample -+@item unused -+@cindex @code{unused} type attribute -+When attached to a type (including a @code{union} or a @code{struct}), -+this attribute means that variables of that type are meant to appear -+possibly unused. GCC does not produce a warning for any variables of -+that type, even if the variable appears to do nothing. This is often -+the case with lock or thread classes, which are usually defined and then -+not referenced, but contain constructors and destructors that have -+nontrivial bookkeeping functions. -+ -+@item visibility -+@cindex @code{visibility} type attribute -+In C++, attribute visibility (@pxref{Function Attributes}) can also be -+applied to class, struct, union and enum types. Unlike other type -+attributes, the attribute must appear between the initial keyword and -+the name of the type; it cannot appear after the body of the type. -+ -+Note that the type visibility is applied to vague linkage entities -+associated with the class (vtable, typeinfo node, etc.). In -+particular, if a class is thrown as an exception in one shared object -+and caught in another, the class must have default visibility. -+Otherwise the two shared objects are unable to use the same -+typeinfo node and exception handling will break. -+ - @end table - - To specify multiple attributes, separate them by commas within the - double parentheses: for example, @samp{__attribute__ ((aligned (16), - packed))}. - -+@node ARM Type Attributes - @subsection ARM Type Attributes - - @cindex @code{notshared} type attribute, ARM -@@ -6105,7 +6572,7 @@ virtual table for @code{C} is not exported. (You can use - @code{__attribute__} instead of @code{__declspec} if you prefer, but - most Symbian OS code uses @code{__declspec}.) +-@end table ++@noindent ++specifies the type ``pointer to 8-byte-aligned pointer to @code{char}''. ++Note again that this does not work with most attributes; for example, ++the usage of @samp{aligned} and @samp{noreturn} attributes given above ++is not yet supported. + +-To specify multiple attributes, separate them by commas within the +-double parentheses: for example, @samp{__attribute__ ((aligned (16), +-packed))}. ++For compatibility with existing code written for compiler versions that ++did not implement attributes on nested declarators, some laxity is ++allowed in the placing of attributes. If an attribute that only applies ++to types is applied to a declaration, it is treated as applying to ++the type of that declaration. If an attribute that only applies to ++declarations is applied to the type of a declaration, it is treated ++as applying to that declaration; and, for compatibility with code ++placing the attributes immediately before the identifier declared, such ++an attribute applied to a function return type is treated as ++applying to the function type, and such an attribute applied to an array ++element type is treated as applying to the array type. If an ++attribute that only applies to function types is applied to a ++pointer-to-function type, it is treated as applying to the pointer ++target type; if such an attribute is applied to a function return type ++that is not a pointer-to-function type, it is treated as applying ++to the function type. + +-@subsection ARM Type Attributes ++@node Function Prototypes ++@section Prototypes and Old-Style Function Definitions ++@cindex function prototype declarations ++@cindex old-style function definitions ++@cindex promotion of formal parameters + +-@cindex @code{notshared} type attribute, ARM +-On those ARM targets that support @code{dllimport} (such as Symbian +-OS), you can use the @code{notshared} attribute to indicate that the +-virtual table and other similar data for a class should not be +-exported from a DLL@. For example: ++GNU C extends ISO C to allow a function prototype to override a later ++old-style non-prototype definition. Consider the following example: + @smallexample +-class __declspec(notshared) C @{ +-public: +- __declspec(dllimport) C(); +- virtual void f(); +-@} +- +-__declspec(dllexport) +-C::C() @{@} +-@end smallexample +- +-@noindent +-In this code, @code{C::C} is exported from the current DLL, but the +-virtual table for @code{C} is not exported. (You can use +-@code{__attribute__} instead of @code{__declspec} if you prefer, but +-most Symbian OS code uses @code{__declspec}.) +- -@anchor{MeP Type Attributes} -+@node MeP Type Attributes - @subsection MeP Type Attributes - - @cindex @code{based} type attribute, MeP -@@ -6117,7 +6584,7 @@ Specifically, the @code{based}, @code{tiny}, @code{near}, and - @code{far} attributes may be applied to either. The @code{io} and - @code{cb} attributes may not be applied to types. +-@subsection MeP Type Attributes +- +-@cindex @code{based} type attribute, MeP +-@cindex @code{tiny} type attribute, MeP +-@cindex @code{near} type attribute, MeP +-@cindex @code{far} type attribute, MeP +-Many of the MeP variable attributes may be applied to types as well. +-Specifically, the @code{based}, @code{tiny}, @code{near}, and +-@code{far} attributes may be applied to either. The @code{io} and +-@code{cb} attributes may not be applied to types. ++/* @r{Use prototypes unless the compiler is old-fashioned.} */ ++#ifdef __STDC__ ++#define P(x) x ++#else ++#define P(x) () ++#endif -@anchor{PowerPC Type Attributes} -+@node PowerPC Type Attributes - @subsection PowerPC Type Attributes +-@subsection PowerPC Type Attributes ++/* @r{Prototype function declaration.} */ ++int isroot P((uid_t)); + +-Three attributes currently are defined for PowerPC configurations: +-@code{altivec}, @code{ms_struct} and @code{gcc_struct}. ++/* @r{Old-style function definition.} */ ++int ++isroot (x) /* @r{??? lossage here ???} */ ++ uid_t x; ++@{ ++ return x == 0; ++@} ++@end smallexample + +-@cindex @code{ms_struct} type attribute, PowerPC +-@cindex @code{gcc_struct} type attribute, PowerPC +-For full documentation of the @code{ms_struct} and @code{gcc_struct} +-attributes please see the documentation in @ref{x86 Type Attributes}. ++Suppose the type @code{uid_t} happens to be @code{short}. ISO C does ++not allow this example, because subword arguments in old-style ++non-prototype definitions are promoted. Therefore in this example the ++function definition's argument is really an @code{int}, which does not ++match the prototype argument type of @code{short}. + +-@cindex @code{altivec} type attribute, PowerPC +-The @code{altivec} attribute allows one to declare AltiVec vector data +-types supported by the AltiVec Programming Interface Manual. The +-attribute requires an argument to specify one of three vector types: +-@code{vector__}, @code{pixel__} (always followed by unsigned short), +-and @code{bool__} (always followed by unsigned). ++This restriction of ISO C makes it hard to write code that is portable ++to traditional C compilers, because the programmer does not know ++whether the @code{uid_t} type is @code{short}, @code{int}, or ++@code{long}. Therefore, in cases like these GNU C allows a prototype ++to override a later old-style definition. More precisely, in GNU C, a ++function prototype argument type overrides the argument type specified ++by a later old-style definition if the former type is the same as the ++latter type before promotion. Thus in GNU C the above example is ++equivalent to the following: - Three attributes currently are defined for PowerPC configurations: -@@ -6144,7 +6611,7 @@ __attribute__((altivec(bool__))) unsigned - These attributes mainly are intended to support the @code{__vector}, - @code{__pixel}, and @code{__bool} AltiVec keywords. + @smallexample +-__attribute__((altivec(vector__))) +-__attribute__((altivec(pixel__))) unsigned short +-__attribute__((altivec(bool__))) unsigned +-@end smallexample +- +-These attributes mainly are intended to support the @code{__vector}, +-@code{__pixel}, and @code{__bool} AltiVec keywords. ++int isroot (uid_t); -@anchor{SPU Type Attributes} -+@node SPU Type Attributes - @subsection SPU Type Attributes +-@subsection SPU Type Attributes ++int ++isroot (uid_t x) ++@{ ++ return x == 0; ++@} ++@end smallexample - @cindex @code{spu_vector} type attribute, SPU -@@ -6153,7 +6620,7 @@ allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU - Language Extensions Specification. It is intended to support the - @code{__vector} keyword. +-@cindex @code{spu_vector} type attribute, SPU +-The SPU supports the @code{spu_vector} attribute for types. This attribute +-allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU +-Language Extensions Specification. It is intended to support the +-@code{__vector} keyword. ++@noindent ++GNU C++ does not support old-style function definitions, so this ++extension is irrelevant. -@anchor{x86 Type Attributes} -+@node x86 Type Attributes - @subsection x86 Type Attributes +-@subsection x86 Type Attributes ++@node C++ Comments ++@section C++ Style Comments ++@cindex @code{//} ++@cindex C++ comments ++@cindex comments, C++ style + +-Two attributes are currently defined for x86 configurations: +-@code{ms_struct} and @code{gcc_struct}. ++In GNU C, you may use C++ style comments, which start with @samp{//} and ++continue until the end of the line. Many other C implementations allow ++such comments, and they are included in the 1999 C standard. However, ++C++ style comments are not recognized if you specify an @option{-std} ++option specifying a version of ISO C before C99, or @option{-ansi} ++(equivalent to @option{-std=c90}). - Two attributes are currently defined for x86 configurations: -@@ -8213,15 +8680,19 @@ identifier, or a sequence of member accesses and array references. +-@table @code ++@node Dollar Signs ++@section Dollar Signs in Identifier Names ++@cindex $ ++@cindex dollar signs in identifier names ++@cindex identifier names, dollar signs in + +-@item ms_struct +-@itemx gcc_struct +-@cindex @code{ms_struct} type attribute, x86 +-@cindex @code{gcc_struct} type attribute, x86 ++In GNU C, you may normally use dollar signs in identifier names. ++This is because many traditional C implementations allow such identifiers. ++However, dollar signs in identifiers are not supported on a few target ++machines, typically because the target assembler does not allow them. + +-If @code{packed} is used on a structure, or if bit-fields are used +-it may be that the Microsoft ABI packs them differently +-than GCC normally packs them. Particularly when moving packed +-data between functions compiled with GCC and the native Microsoft compiler +-(either via function call or as data in a file), it may be necessary to access +-either format. ++@node Character Escapes ++@section The Character @key{ESC} in Constants + +-Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86 +-compilers to match the native Microsoft compiler. +-@end table ++You can use the sequence @samp{\e} in a string or character constant to ++stand for the ASCII character @key{ESC}. + + @node Alignment + @section Inquiring on Alignment of Types or Variables +@@ -8213,15 +8702,19 @@ identifier, or a sequence of member accesses and array references. The following built-in functions are intended to be compatible with those described in the @cite{Intel Itanium Processor-specific Application Binary Interface}, @@ -4774,7 +6582,7 @@ Not all operations are supported by all target processors. If a particular operation cannot be implemented on the target processor, a warning is generated and a call to an external function is generated. The external -@@ -8243,11 +8714,10 @@ after the operation. +@@ -8243,11 +8736,10 @@ after the operation. All of the routines are described in the Intel documentation to take ``an optional list of variables protected by the memory barrier''. It's not clear what is meant by that; it could mean that @emph{only} the @@ -4790,7 +6598,7 @@ @table @code @item @var{type} __sync_fetch_and_add (@var{type} *ptr, @var{type} value, ...) -@@ -8350,45 +8820,47 @@ are not prevented from being speculated to before the barrier. +@@ -8350,45 +8842,47 @@ are not prevented from being speculated to before the barrier. @node __atomic Builtins @section Built-in Functions for Memory Model Aware Atomic Operations @@ -4863,7 +6671,7 @@ @table @code @item __ATOMIC_RELAXED -@@ -8403,13 +8875,32 @@ semantic stores from another thread. +@@ -8403,13 +8897,32 @@ semantic stores from another thread. Barrier to sinking of code and synchronizes with acquire (or stronger) semantic loads from another thread. @item __ATOMIC_ACQ_REL @@ -4898,7 +6706,7 @@ When implementing patterns for these built-in functions, the memory model parameter can be ignored as long as the pattern implements the most restrictive @code{__ATOMIC_SEQ_CST} model. Any of the other memory models -@@ -8480,19 +8971,20 @@ of @code{*@var{ptr}} is copied into @code{*@var{ret}}. +@@ -8480,19 +8993,20 @@ of @code{*@var{ptr}} is copied into @code{*@var{ret}}. @deftypefn {Built-in Function} bool __atomic_compare_exchange_n (@var{type} *ptr, @var{type} *expected, @var{type} desired, bool weak, int success_memmodel, int failure_memmodel) This built-in function implements an atomic compare and exchange operation. This compares the contents of @code{*@var{ptr}} with the contents of @@ -4957,49 +6765,175 @@ -tno-android-cc -tno-android-ld} @emph{H8/300 Options} -@@ -12326,8 +12326,12 @@ corresponding flag to the linker. +@@ -12259,7 +12259,10 @@ Generate big-endian code. This is the default when GCC is configured for an + + @item -mgeneral-regs-only + @opindex mgeneral-regs-only +-Generate code which uses only the general registers. ++Generate code which uses only the general-purpose registers. This is equivalent ++to feature modifier @option{nofp} of @option{-march} or @option{-mcpu}, except ++that @option{-mgeneral-regs-only} takes precedence over any conflicting feature ++modifier regardless of sequence. + + @item -mlittle-endian + @opindex mlittle-endian +@@ -12326,20 +12329,26 @@ corresponding flag to the linker. + @opindex march Specify the name of the target architecture, optionally suffixed by one or more feature modifiers. This option has the form - @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the +-@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the -only permissible value for @var{arch} is @samp{armv8-a}. The permissible -values for @var{feature} are documented in the sub-section below. -+only permissible value for @var{arch} is @samp{armv8-a}. -+The permissible values for @var{feature} are documented in the sub-section -+below. Additionally on native AArch64 GNU/Linux systems the value ++@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}. + +-Where conflicting feature modifiers are specified, the right-most feature is +-used. ++The permissible values for @var{arch} are @samp{armv8-a} or ++@samp{armv8.1-a}. + +-GCC uses this name to determine what kind of instructions it can emit when +-generating assembly code. ++For the permissible values for @var{feature}, see the sub-section on ++@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu} ++Feature Modifiers}. Where conflicting feature modifiers are ++specified, the right-most feature is used. + +-Where @option{-march} is specified without either of @option{-mtune} +-or @option{-mcpu} also being specified, the code is tuned to perform +-well across a range of target processors implementing the target +-architecture. ++Additionally on native AArch64 GNU/Linux systems the value +@samp{native} is available. This option causes the compiler to pick the +architecture of the host system. If the compiler is unable to recognize the +architecture of the host system this option has no effect. - - Where conflicting feature modifiers are specified, the right-most feature is - used. -@@ -12351,6 +12355,13 @@ Additionally, this option can specify that GCC should tune the performance ++ ++GCC uses @var{name} to determine what kind of instructions it can emit ++when generating assembly code. If @option{-march} is specified ++without either of @option{-mtune} or @option{-mcpu} also being ++specified, the code is tuned to perform well across a range of target ++processors implementing the target architecture. + + @item -mtune=@var{name} + @opindex mtune +@@ -12352,6 +12361,12 @@ Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible values for this option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}. -+Additionally on native AArch64 GNU/Linux systems the value @samp{native} -+is available. -+This option causes the compiler to pick the architecture of and tune the -+performance of the code for the processor of the host system. -+If the compiler is unable to recognize the processor of the host system -+this option has no effect. ++Additionally on native AArch64 GNU/Linux systems the value ++@samp{native} is available. This option causes the compiler to pick ++the architecture of and tune the performance of the code for the ++processor of the host system. If the compiler is unable to recognize ++the processor of the host system this option has no effect. + Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=} are specified, the code is tuned to perform well across a range of target processors. -@@ -12363,7 +12374,11 @@ Specify the name of the target processor, optionally suffixed by one or more - feature modifiers. This option has the form - @option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the - permissible values for @var{cpu} are the same as those available for +@@ -12360,45 +12375,75 @@ This option cannot be suffixed by feature modifiers. + + @item -mcpu=@var{name} + @opindex mcpu +-Specify the name of the target processor, optionally suffixed by one or more +-feature modifiers. This option has the form +-@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the +-permissible values for @var{cpu} are the same as those available for -@option{-mtune}. -+@option{-mtune}. Additionally on native AArch64 GNU/Linux systems the -+value @samp{native} is available. -+This option causes the compiler to tune the performance of the code for the -+processor of the host system. If the compiler is unable to recognize the -+processor of the host system this option has no effect. - - The permissible values for @var{feature} are documented in the sub-section - below. -@@ -13207,9 +13222,9 @@ Permissible names are: @samp{arm2}, @samp{arm250}, +- +-The permissible values for @var{feature} are documented in the sub-section +-below. +- +-Where conflicting feature modifiers are specified, the right-most feature is +-used. +- +-GCC uses this name to determine what kind of instructions it can emit when ++Specify the name of the target processor, optionally suffixed by one ++or more feature modifiers. This option has the form ++@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where ++the permissible values for @var{cpu} are the same as those available ++for @option{-mtune}. The permissible values for @var{feature} are ++documented in the sub-section on ++@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu} ++Feature Modifiers}. Where conflicting feature modifiers are ++specified, the right-most feature is used. ++ ++Additionally on native AArch64 GNU/Linux systems the value ++@samp{native} is available. This option causes the compiler to tune ++the performance of the code for the processor of the host system. If ++the compiler is unable to recognize the processor of the host system ++this option has no effect. ++ ++GCC uses @var{name} to determine what kind of instructions it can emit when + generating assembly code (as if by @option{-march}) and to determine + the target processor for which to tune for performance (as if + by @option{-mtune}). Where this option is used in conjunction + with @option{-march} or @option{-mtune}, those options take precedence + over the appropriate part of this option. ++ ++@item -moverride=@var{string} ++@opindex moverride ++Override tuning decisions made by the back-end in response to a ++@option{-mtune=} switch. The syntax, semantics, and accepted values ++for @var{string} in this option are not guaranteed to be consistent ++across releases. ++ ++This option is only intended to be useful when developing GCC. + @end table + + @subsubsection @option{-march} and @option{-mcpu} Feature Modifiers ++@anchor{aarch64-feature-modifiers} + @cindex @option{-march} feature modifiers + @cindex @option{-mcpu} feature modifiers +-Feature modifiers used with @option{-march} and @option{-mcpu} can be one +-the following: ++Feature modifiers used with @option{-march} and @option{-mcpu} can be any of ++the following and their inverses @option{no@var{feature}}: + + @table @samp + @item crc + Enable CRC extension. + @item crypto +-Enable Crypto extension. This implies Advanced SIMD is enabled. ++Enable Crypto extension. This also enables Advanced SIMD and floating-point ++instructions. + @item fp +-Enable floating-point instructions. ++Enable floating-point instructions. This is on by default for all possible ++values for options @option{-march} and @option{-mcpu}. + @item simd +-Enable Advanced SIMD instructions. This implies floating-point instructions +-are enabled. This is the default for all current possible values for options +-@option{-march} and @option{-mcpu=}. ++Enable Advanced SIMD instructions. This also enables floating-point ++instructions. This is on by default for all possible values for options ++@option{-march} and @option{-mcpu}. ++@item lse ++Enable Large System Extension instructions. ++@item pan ++Enable Privileged Access Never support. ++@item lor ++Enable Limited Ordering Regions support. ++@item rdma ++Enable ARMv8.1 Advanced SIMD instructions. This implies Advanced SIMD ++is enabled. ++ + @end table + ++That is, @option{crypto} implies @option{simd} implies @option{fp}. ++Conversely, @option{nofp} (or equivalently, @option{-mgeneral-regs-only}) ++implies @option{nosimd} implies @option{nocrypto}. ++ + @node Adapteva Epiphany Options + @subsection Adapteva Epiphany Options + +@@ -13169,7 +13214,7 @@ of the @option{-mcpu=} option. Permissible names are: @samp{armv2}, + @samp{armv2a}, @samp{armv3}, @samp{armv3m}, @samp{armv4}, @samp{armv4t}, + @samp{armv5}, @samp{armv5t}, @samp{armv5e}, @samp{armv5te}, + @samp{armv6}, @samp{armv6j}, +-@samp{armv6t2}, @samp{armv6z}, @samp{armv6zk}, @samp{armv6-m}, ++@samp{armv6t2}, @samp{armv6z}, @samp{armv6kz}, @samp{armv6-m}, + @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, @samp{armv7e-m}, + @samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc}, + @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. +@@ -13208,9 +13253,9 @@ Permissible names are: @samp{arm2}, @samp{arm250}, @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e}, @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp}, @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s}, @@ -5012,7 +6946,7 @@ @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, @samp{cortex-r7}, @samp{cortex-m7}, @samp{cortex-m4}, -@@ -13229,8 +13244,8 @@ Permissible names are: @samp{arm2}, @samp{arm250}, +@@ -13230,8 +13275,8 @@ Permissible names are: @samp{arm2}, @samp{arm250}, Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible names are: @@ -5023,7 +6957,18 @@ @option{-mtune=generic-@var{arch}} specifies that GCC should tune the performance for a blend of processors within architecture @var{arch}. -@@ -15324,13 +15339,19 @@ These @samp{-m} options are defined for GNU/Linux targets: +@@ -13397,6 +13442,10 @@ that executes in ARM state, but the default can be changed by + configuring GCC with the @option{--with-mode=}@var{state} + configure option. + ++You can also override the ARM and Thumb mode for each function ++by using the @code{target("thumb")} and @code{target("arm")} function attributes ++(@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). ++ + @item -mtpcs-frame + @opindex mtpcs-frame + Generate a stack frame that is compliant with the Thumb Procedure Call +@@ -15325,13 +15374,19 @@ These @samp{-m} options are defined for GNU/Linux targets: @item -mglibc @opindex mglibc Use the GNU C library. This is the default except @@ -5063,6 +7008,18 @@ @deftypefnx {GIMPLE function} tree gimple_convert (gimple_seq *, location_t, tree, tree); @end deftypefn +--- a/src/gcc/doc/sourcebuild.texi ++++ b/src/gcc/doc/sourcebuild.texi +@@ -1695,6 +1695,9 @@ Target supports FPU instructions. + @item non_strict_align + Target does not require strict alignment. + ++@item sqrt_insn ++Target has a square root instruction that the compiler can generate. ++ + @item sse + Target supports compiling @code{sse} instructions. + --- a/src/gcc/doc/tm.texi +++ b/src/gcc/doc/tm.texi @@ -9789,7 +9789,7 @@ be documented in @file{extend.texi}. diff -u gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff --- gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff +++ gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff @@ -88,8 +88,8 @@ =================================================================== --- a/src/gcc/LINARO-VERSION +++ /dev/null -@@ -1 +0,0 @@ --5.1-2015.07~dev +@@ -1,1 +0,0 @@ +-Snapshot 5.2-2015.10 Index: b/src/gcc/configure.ac =================================================================== --- a/src/gcc/configure.ac diff -u gcc-5-5.2.1/debian/patches/gcc-linaro.diff gcc-5-5.2.1/debian/patches/gcc-linaro.diff --- gcc-5-5.2.1/debian/patches/gcc-linaro.diff +++ gcc-5-5.2.1/debian/patches/gcc-linaro.diff @@ -1,6 +1,6 @@ -# DP: Changes for the Linaro 5-2015.09 release. +# DP: Changes for the Linaro 5-2015.10 release. -LANG=C git diff 2006973fa839ccbe189a1e7408400dc96ed880b4..ac19ac6481a3f326d9f41403f5dadab548b2c8a6 \ +LANG=C git diff 472e2599b141820b2a1565209528750de18731f8..5db159c220ec010ab6ae331802cddc242f83bb38 \ | egrep -v '^(diff|index) ' \ | filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ @@ -19,7 +19,7 @@ --- a/src//dev/null +++ b/src/gcc/LINARO-VERSION @@ -0,0 +1 @@ -+5.1-2015.07~dev ++Snapshot 5.2-2015.10 --- a/src/gcc/Makefile.in +++ b/src/gcc/Makefile.in @@ -527,10 +527,6 @@ xm_include_list=@xm_include_list@ @@ -87,6 +87,66 @@ ada.clean: ada.distclean: -$(RM) ada/Makefile +--- a/src/gcc/builtins.c ++++ b/src/gcc/builtins.c +@@ -5477,7 +5477,8 @@ expand_builtin_atomic_compare_exchange (machine_mode mode, tree exp, + the normal case where EXPECT is totally private, i.e. a register. At + which point the store can be unconditional. */ + label = gen_label_rtx (); +- emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL, VOIDmode, 1, label); ++ emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL, ++ GET_MODE (target), 1, label); + emit_move_insn (expect, oldval); + emit_label (label); + +--- a/src/gcc/c-family/c-common.h ++++ b/src/gcc/c-family/c-common.h +@@ -1084,6 +1084,8 @@ extern const unsigned char executable_checksum[16]; + /* In c-cppbuiltin.c */ + extern void builtin_define_std (const char *macro); + extern void builtin_define_with_value (const char *, const char *, int); ++extern void builtin_define_with_int_value (const char *, HOST_WIDE_INT); ++extern void builtin_define_type_sizeof (const char *, tree); + extern void c_stddef_cpp_builtins (void); + extern void fe_file_change (const struct line_map *); + extern void c_parse_error (const char *, enum cpp_ttype, tree, unsigned char); +--- a/src/gcc/c-family/c-cppbuiltin.c ++++ b/src/gcc/c-family/c-cppbuiltin.c +@@ -58,8 +58,6 @@ along with GCC; see the file COPYING3. If not see + #endif + + /* Non-static as some targets don't use it. */ +-void builtin_define_std (const char *) ATTRIBUTE_UNUSED; +-static void builtin_define_with_int_value (const char *, HOST_WIDE_INT); + static void builtin_define_with_hex_fp_value (const char *, tree, + int, const char *, + const char *, +@@ -68,7 +66,6 @@ static void builtin_define_stdint_macros (void); + static void builtin_define_constants (const char *, tree); + static void builtin_define_type_max (const char *, tree); + static void builtin_define_type_minmax (const char *, const char *, tree); +-static void builtin_define_type_sizeof (const char *, tree); + static void builtin_define_float_constants (const char *, + const char *, + const char *, +@@ -113,7 +110,7 @@ mode_has_fma (machine_mode mode) + } + + /* Define NAME with value TYPE size_unit. */ +-static void ++void + builtin_define_type_sizeof (const char *name, tree type) + { + builtin_define_with_int_value (name, +@@ -1372,7 +1369,7 @@ builtin_define_with_value (const char *macro, const char *expansion, int is_str) + + + /* Pass an object-like macro and an integer value to define it to. */ +-static void ++void + builtin_define_with_int_value (const char *macro, HOST_WIDE_INT value) + { + char *buf; --- a/src/gcc/c/Make-lang.in +++ b/src/gcc/c/Make-lang.in @@ -95,6 +95,8 @@ c.srcman: @@ -98,6 +158,85 @@ # 'make check' in gcc/ looks for check-c. Redirect it to check-gcc. check-c : check-gcc +--- a/src/gcc/c/c-decl.c ++++ b/src/gcc/c/c-decl.c +@@ -2632,6 +2632,12 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) + else if (DECL_PRESERVE_P (newdecl)) + DECL_PRESERVE_P (olddecl) = 1; + ++ /* Merge DECL_COMMON */ ++ if (VAR_P (olddecl) && VAR_P (newdecl) ++ && !lookup_attribute ("common", DECL_ATTRIBUTES (newdecl)) ++ && !lookup_attribute ("nocommon", DECL_ATTRIBUTES (newdecl))) ++ DECL_COMMON (newdecl) = DECL_COMMON (newdecl) && DECL_COMMON (olddecl); ++ + /* Copy most of the decl-specific fields of NEWDECL into OLDDECL. + But preserve OLDDECL's DECL_UID, DECL_CONTEXT and + DECL_ARGUMENTS (if appropriate). */ +@@ -7524,12 +7530,23 @@ detect_field_duplicates (tree fieldlist) + /* Finish up struct info used by -Wc++-compat. */ + + static void +-warn_cxx_compat_finish_struct (tree fieldlist) ++warn_cxx_compat_finish_struct (tree fieldlist, enum tree_code code, ++ location_t record_loc) + { + unsigned int ix; + tree x; + struct c_binding *b; + ++ if (fieldlist == NULL_TREE) ++ { ++ if (code == RECORD_TYPE) ++ warning_at (record_loc, OPT_Wc___compat, ++ "empty struct has size 0 in C, size 1 in C++"); ++ else ++ warning_at (record_loc, OPT_Wc___compat, ++ "empty union has size 0 in C, size 1 in C++"); ++ } ++ + /* Set the C_TYPE_DEFINED_IN_STRUCT flag for each type defined in + the current struct. We do this now at the end of the struct + because the flag is used to issue visibility warnings, and we +@@ -7862,7 +7879,7 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, + DECL_EXPR, build_decl (loc, TYPE_DECL, NULL, t))); + + if (warn_cxx_compat) +- warn_cxx_compat_finish_struct (fieldlist); ++ warn_cxx_compat_finish_struct (fieldlist, TREE_CODE (t), loc); + + struct_parse_info->struct_types.release (); + struct_parse_info->fields.release (); +--- a/src/gcc/cfgexpand.c ++++ b/src/gcc/cfgexpand.c +@@ -1382,7 +1382,16 @@ expand_one_var (tree var, bool toplevel, bool really_expand) + else + { + if (really_expand) +- expand_one_stack_var (origvar); ++ { ++ if (lookup_attribute ("naked", ++ DECL_ATTRIBUTES (current_function_decl))) ++ error ("cannot allocate stack for variable %q+D, naked function.", ++ var); ++ ++ expand_one_stack_var (origvar); ++ } ++ ++ + return tree_to_uhwi (DECL_SIZE_UNIT (var)); + } + return 0; +--- a/src/gcc/cgraphunit.c ++++ b/src/gcc/cgraphunit.c +@@ -2505,6 +2505,7 @@ cgraph_node::create_wrapper (cgraph_node *target) + memset (&thunk, 0, sizeof (cgraph_thunk_info)); + thunk.thunk_p = true; + create_edge (target, NULL, count, CGRAPH_FREQ_BASE); ++ callees->can_throw_external = !TREE_NOTHROW (target->decl); + + tree arguments = DECL_ARGUMENTS (decl); + --- a/src/gcc/combine.c +++ b/src/gcc/combine.c @@ -1650,6 +1650,73 @@ setup_incoming_promotions (rtx_insn *first) @@ -234,6 +373,110 @@ /* Don't eliminate a store in the stack pointer. */ if (dest == stack_pointer_rtx /* Don't combine with an insn that sets a register to itself if it has +@@ -5463,6 +5511,51 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest, + SUBST (XEXP (x, 1), temp); + } + ++ /* Try to fold this expression in case we have constants that weren't ++ present before. */ ++ temp = 0; ++ switch (GET_RTX_CLASS (code)) ++ { ++ case RTX_UNARY: ++ if (op0_mode == VOIDmode) ++ op0_mode = GET_MODE (XEXP (x, 0)); ++ temp = simplify_unary_operation (code, mode, XEXP (x, 0), op0_mode); ++ break; ++ case RTX_COMPARE: ++ case RTX_COMM_COMPARE: ++ { ++ machine_mode cmp_mode = GET_MODE (XEXP (x, 0)); ++ if (cmp_mode == VOIDmode) ++ { ++ cmp_mode = GET_MODE (XEXP (x, 1)); ++ if (cmp_mode == VOIDmode) ++ cmp_mode = op0_mode; ++ } ++ temp = simplify_relational_operation (code, mode, cmp_mode, ++ XEXP (x, 0), XEXP (x, 1)); ++ } ++ break; ++ case RTX_COMM_ARITH: ++ case RTX_BIN_ARITH: ++ temp = simplify_binary_operation (code, mode, XEXP (x, 0), XEXP (x, 1)); ++ break; ++ case RTX_BITFIELD_OPS: ++ case RTX_TERNARY: ++ temp = simplify_ternary_operation (code, mode, op0_mode, XEXP (x, 0), ++ XEXP (x, 1), XEXP (x, 2)); ++ break; ++ default: ++ break; ++ } ++ ++ if (temp) ++ { ++ x = temp; ++ code = GET_CODE (temp); ++ op0_mode = VOIDmode; ++ mode = GET_MODE (temp); ++ } ++ + /* If this is a simple operation applied to an IF_THEN_ELSE, try + applying it to the arms of the IF_THEN_ELSE. This often simplifies + things. Check for cases where both arms are testing the same +@@ -5562,51 +5655,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest, + } + } + +- /* Try to fold this expression in case we have constants that weren't +- present before. */ +- temp = 0; +- switch (GET_RTX_CLASS (code)) +- { +- case RTX_UNARY: +- if (op0_mode == VOIDmode) +- op0_mode = GET_MODE (XEXP (x, 0)); +- temp = simplify_unary_operation (code, mode, XEXP (x, 0), op0_mode); +- break; +- case RTX_COMPARE: +- case RTX_COMM_COMPARE: +- { +- machine_mode cmp_mode = GET_MODE (XEXP (x, 0)); +- if (cmp_mode == VOIDmode) +- { +- cmp_mode = GET_MODE (XEXP (x, 1)); +- if (cmp_mode == VOIDmode) +- cmp_mode = op0_mode; +- } +- temp = simplify_relational_operation (code, mode, cmp_mode, +- XEXP (x, 0), XEXP (x, 1)); +- } +- break; +- case RTX_COMM_ARITH: +- case RTX_BIN_ARITH: +- temp = simplify_binary_operation (code, mode, XEXP (x, 0), XEXP (x, 1)); +- break; +- case RTX_BITFIELD_OPS: +- case RTX_TERNARY: +- temp = simplify_ternary_operation (code, mode, op0_mode, XEXP (x, 0), +- XEXP (x, 1), XEXP (x, 2)); +- break; +- default: +- break; +- } +- +- if (temp) +- { +- x = temp; +- code = GET_CODE (temp); +- op0_mode = VOIDmode; +- mode = GET_MODE (temp); +- } +- + /* First see if we can apply the inverse distributive law. */ + if (code == PLUS || code == MINUS + || code == AND || code == IOR || code == XOR) @@ -7723,9 +7771,8 @@ extract_left_shift (rtx x, int count) We try, as much as possible, to re-use rtl expressions to save memory. @@ -280,6 +523,15 @@ } --- a/src/gcc/config.gcc +++ b/src/gcc/config.gcc +@@ -302,7 +302,7 @@ m32c*-*-*) + aarch64*-*-*) + cpu_type=aarch64 + extra_headers="arm_neon.h arm_acle.h" +- extra_objs="aarch64-builtins.o aarch-common.o" ++ extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o" + target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c" + target_has_targetm_common=yes + ;; @@ -575,7 +575,7 @@ case ${target} in esac @@ -299,6 +551,15 @@ *) tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC" ;; +@@ -3484,7 +3487,7 @@ case "${target}" in + + eval "val=\$with_$which" + base_val=`echo $val | sed -e 's/\+.*//'` +- ext_val=`echo $val | sed -e 's/[a-z0-9\-]\+//'` ++ ext_val=`echo $val | sed -e 's/[a-z0-9.-]\+//'` + + if [ $which = arch ]; then + def=aarch64-arches.def --- a/src/gcc/config.host +++ b/src/gcc/config.host @@ -99,6 +99,14 @@ case ${host} in @@ -316,6 +577,106 @@ arm*-*-freebsd* | arm*-*-linux*) case ${target} in arm*-*-*) +--- a/src/gcc/config/aarch64/aarch64-arches.def ++++ b/src/gcc/config/aarch64/aarch64-arches.def +@@ -27,3 +27,4 @@ + the flags implied by the architecture. */ + + AARCH64_ARCH("armv8-a", generic, 8, AARCH64_FL_FOR_ARCH8) ++AARCH64_ARCH("armv8.1-a", generic, 8, AARCH64_FL_FOR_ARCH8_1) +--- a/src/gcc/config/aarch64/aarch64-builtins.c ++++ b/src/gcc/config/aarch64/aarch64-builtins.c +@@ -133,7 +133,9 @@ enum aarch64_type_qualifiers + /* Polynomial types. */ + qualifier_poly = 0x100, + /* Lane indices - must be in range, and flipped for bigendian. */ +- qualifier_lane_index = 0x200 ++ qualifier_lane_index = 0x200, ++ /* Lane indices for single lane structure loads and stores. */ ++ qualifier_struct_load_store_lane_index = 0x400 + }; + + typedef struct +@@ -235,7 +237,7 @@ aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + static enum aarch64_type_qualifiers + aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_const_pointer_map_mode, +- qualifier_none, qualifier_none }; ++ qualifier_none, qualifier_struct_load_store_lane_index }; + #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers) + + static enum aarch64_type_qualifiers +@@ -267,7 +269,7 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + static enum aarch64_type_qualifiers + aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, +- qualifier_none, qualifier_none }; ++ qualifier_none, qualifier_struct_load_store_lane_index }; + #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) + + #define CF0(N, X) CODE_FOR_aarch64_##N##X +@@ -883,12 +885,14 @@ typedef enum + SIMD_ARG_COPY_TO_REG, + SIMD_ARG_CONSTANT, + SIMD_ARG_LANE_INDEX, ++ SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, + SIMD_ARG_STOP + } builtin_simd_arg; + + static rtx + aarch64_simd_expand_args (rtx target, int icode, int have_retval, +- tree exp, builtin_simd_arg *args) ++ tree exp, builtin_simd_arg *args, ++ enum machine_mode builtin_mode) + { + rtx pat; + rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */ +@@ -927,6 +931,19 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, + op[opc] = copy_to_mode_reg (mode, op[opc]); + break; + ++ case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX: ++ gcc_assert (opc > 1); ++ if (CONST_INT_P (op[opc])) ++ { ++ aarch64_simd_lane_bounds (op[opc], 0, ++ GET_MODE_NUNITS (builtin_mode), ++ exp); ++ /* Keep to GCC-vector-extension lane indices in the RTL. */ ++ op[opc] = ++ GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))); ++ } ++ goto constant_arg; ++ + case SIMD_ARG_LANE_INDEX: + /* Must be a previous operand into which this is an index. */ + gcc_assert (opc > 0); +@@ -941,6 +958,7 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, + /* Fall through - if the lane index isn't a constant then + the next case will error. */ + case SIMD_ARG_CONSTANT: ++constant_arg: + if (!(*insn_data[icode].operand[opc].predicate) + (op[opc], mode)) + { +@@ -1049,6 +1067,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) + + if (d->qualifiers[qualifiers_k] & qualifier_lane_index) + args[k] = SIMD_ARG_LANE_INDEX; ++ else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) ++ args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_immediate) + args[k] = SIMD_ARG_CONSTANT; + else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) +@@ -1072,7 +1092,7 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) + /* The interface to aarch64_simd_expand_args expects a 0 if + the function is void, and a 1 if it is not. */ + return aarch64_simd_expand_args +- (target, icode, !is_void, exp, &args[1]); ++ (target, icode, !is_void, exp, &args[1], d->mode); + } + + rtx --- a/src/gcc/config/aarch64/aarch64-cores.def +++ b/src/gcc/config/aarch64/aarch64-cores.def @@ -21,7 +21,7 @@ @@ -349,8 +710,8 @@ -AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1) +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") -+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08") -+AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001") ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") ++AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") +AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") +AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") @@ -359,7 +720,7 @@ -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") -+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03") ++AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") --- a/src/gcc/config/aarch64/aarch64-cost-tables.h +++ b/src/gcc/config/aarch64/aarch64-cost-tables.h @@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs = @@ -385,6 +746,47 @@ #ifdef HAVE_AS_MABI_OPTION #define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}" +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/aarch64-fusion-pairs.def +@@ -0,0 +1,38 @@ ++/* Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++/* Pairs of instructions which can be fused. before including this file, ++ define a macro: ++ ++ AARCH64_FUSION_PAIR (name, internal_name, index_bit) ++ ++ Where: ++ ++ NAME is a string giving a friendly name for the instructions to fuse. ++ INTERNAL_NAME gives the internal name suitable for appending to ++ AARCH64_FUSE_ to give an enum name. ++ INDEX_BIT is the bit to set in the bitmask of supported fusion ++ operations. */ ++ ++AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0) ++AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1) ++AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2) ++AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3) ++AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4) ++ --- a/src/gcc/config/aarch64/aarch64-linux.h +++ b/src/gcc/config/aarch64/aarch64-linux.h @@ -23,6 +23,9 @@ @@ -399,7 +801,7 @@ --- a/src/gcc/config/aarch64/aarch64-option-extensions.def +++ b/src/gcc/config/aarch64/aarch64-option-extensions.def -@@ -21,18 +21,25 @@ +@@ -21,18 +21,29 @@ Before using #include to read this file, define a macro: @@ -427,10 +829,14 @@ -AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) -AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) -AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) -+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp") -+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd") -+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2") ++AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp") ++AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd") ++AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2") +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32") ++AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, AARCH64_FL_LSE, "lse") ++AARCH64_OPT_EXTENSION("pan", AARCH64_FL_PAN, AARCH64_FL_PAN, "pan") ++AARCH64_OPT_EXTENSION("lor", AARCH64_FL_LOR, AARCH64_FL_LOR, "lor") ++AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA | AARCH64_FL_FPSIMD, AARCH64_FL_RDMA, "rdma") --- a/src/gcc/config/aarch64/aarch64-opts.h +++ b/src/gcc/config/aarch64/aarch64-opts.h @@ -25,7 +25,7 @@ @@ -444,7 +850,7 @@ #undef AARCH64_CORE --- a/src/gcc/config/aarch64/aarch64-protos.h +++ b/src/gcc/config/aarch64/aarch64-protos.h -@@ -162,12 +162,20 @@ struct cpu_vector_cost +@@ -162,26 +162,78 @@ struct cpu_vector_cost const int cond_not_taken_branch_cost; /* Cost of not taken branch. */ }; @@ -457,21 +863,77 @@ + struct tune_params { - const struct cpu_cost_table *const insn_extra_cost; - const struct cpu_addrcost_table *const addr_cost; - const struct cpu_regmove_cost *const regmove_cost; - const struct cpu_vector_cost *const vec_costs; -+ const struct cpu_branch_cost *const branch_costs; - const int memmov_cost; - const int issue_rate; - const unsigned int fuseable_ops; -@@ -177,11 +185,14 @@ struct tune_params - const int int_reassoc_width; - const int fp_reassoc_width; - const int vec_reassoc_width; -+ const int min_div_recip_mul_sf; -+ const int min_div_recip_mul_df; +- const struct cpu_cost_table *const insn_extra_cost; +- const struct cpu_addrcost_table *const addr_cost; +- const struct cpu_regmove_cost *const regmove_cost; +- const struct cpu_vector_cost *const vec_costs; +- const int memmov_cost; +- const int issue_rate; +- const unsigned int fuseable_ops; +- const int function_align; +- const int jump_align; +- const int loop_align; +- const int int_reassoc_width; +- const int fp_reassoc_width; +- const int vec_reassoc_width; ++ const struct cpu_cost_table *insn_extra_cost; ++ const struct cpu_addrcost_table *addr_cost; ++ const struct cpu_regmove_cost *regmove_cost; ++ const struct cpu_vector_cost *vec_costs; ++ const struct cpu_branch_cost *branch_costs; ++ int memmov_cost; ++ int issue_rate; ++ unsigned int fusible_ops; ++ int function_align; ++ int jump_align; ++ int loop_align; ++ int int_reassoc_width; ++ int fp_reassoc_width; ++ int vec_reassoc_width; ++ int min_div_recip_mul_sf; ++ int min_div_recip_mul_df; ++ unsigned int extra_tuning_flags; ++}; ++ ++#define AARCH64_FUSION_PAIR(x, name, index) \ ++ AARCH64_FUSE_##name = (1 << index), ++/* Supported fusion operations. */ ++enum aarch64_fusion_pairs ++{ ++ AARCH64_FUSE_NOTHING = 0, ++#include "aarch64-fusion-pairs.def" ++ ++/* Hacky macro to build AARCH64_FUSE_ALL. The sequence below expands ++ to: ++ AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ... */ ++#undef AARCH64_FUSION_PAIR ++#define AARCH64_FUSION_PAIR(x, name, y) \ ++ | AARCH64_FUSE_##name ++ ++ AARCH64_FUSE_ALL = 0 ++#include "aarch64-fusion-pairs.def" }; ++#undef AARCH64_FUSION_PAIR ++ ++#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \ ++ AARCH64_EXTRA_TUNE_##name = (1 << index), ++/* Supported tuning flags. */ ++enum aarch64_extra_tuning_flags ++{ ++ AARCH64_EXTRA_TUNE_NONE = 0, ++#include "aarch64-tuning-flags.def" ++ ++/* Hacky macro to build the "all" flag mask. ++ Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc. */ ++#undef AARCH64_EXTRA_TUNING_OPTION ++#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \ ++ | AARCH64_EXTRA_TUNE_##name ++ AARCH64_EXTRA_TUNE_ALL = 0 ++#include "aarch64-tuning-flags.def" ++}; ++#undef AARCH64_EXTRA_TUNING_OPTION ++ ++extern struct tune_params aarch64_tune_params; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); @@ -480,7 +942,15 @@ enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); -@@ -264,12 +275,6 @@ void init_aarch64_simd_builtins (void); +@@ -248,6 +300,7 @@ unsigned aarch64_dbx_register_number (unsigned); + unsigned aarch64_trampoline_size (void); + void aarch64_asm_output_labelref (FILE *, const char *); + void aarch64_elf_asm_named_section (const char *, unsigned, tree); ++void aarch64_err_no_fpadvsimd (machine_mode, const char *); + void aarch64_expand_epilogue (bool); + void aarch64_expand_mov_immediate (rtx, rtx); + void aarch64_expand_prologue (void); +@@ -264,12 +317,6 @@ void init_aarch64_simd_builtins (void); void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int); @@ -493,6 +963,45 @@ /* Expand builtins for SIMD intrinsics. */ rtx aarch64_simd_expand_builtin (int, tree, rtx); +@@ -295,6 +342,10 @@ rtx aarch64_load_tp (rtx); + + void aarch64_expand_compare_and_swap (rtx op[]); + void aarch64_split_compare_and_swap (rtx op[]); ++void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); ++ ++bool aarch64_atomic_ldop_supported_p (enum rtx_code); ++void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx); + void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); + + bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE); +--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def ++++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def +@@ -88,9 +88,9 @@ + BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0) + BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0) + /* Implemented by aarch64_ld_lane. */ +- BUILTIN_VQ (LOADSTRUCT_LANE, ld2_lane, 0) +- BUILTIN_VQ (LOADSTRUCT_LANE, ld3_lane, 0) +- BUILTIN_VQ (LOADSTRUCT_LANE, ld4_lane, 0) ++ BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld2_lane, 0) ++ BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld3_lane, 0) ++ BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld4_lane, 0) + /* Implemented by aarch64_st. */ + BUILTIN_VDC (STORESTRUCT, st2, 0) + BUILTIN_VDC (STORESTRUCT, st3, 0) +@@ -100,9 +100,9 @@ + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) + +- BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) +- BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) +- BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) ++ BUILTIN_VALLDIF (STORESTRUCT_LANE, st2_lane, 0) ++ BUILTIN_VALLDIF (STORESTRUCT_LANE, st3_lane, 0) ++ BUILTIN_VALLDIF (STORESTRUCT_LANE, st4_lane, 0) + + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) --- a/src/gcc/config/aarch64/aarch64-simd.md +++ b/src/gcc/config/aarch64/aarch64-simd.md @@ -2057,13 +2057,13 @@ @@ -555,7 +1064,23 @@ "TARGET_SIMD" { emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], -@@ -3955,6 +3955,7 @@ +@@ -3919,10 +3919,13 @@ + (unspec:OI [(match_operand: 1 "aarch64_simd_struct_operand" "Utv") + (match_operand:OI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] + UNSPEC_LD2_LANE))] + "TARGET_SIMD" +- "ld2\\t{%S0. - %T0.}[%3], %1" ++ { ++ operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); ++ return "ld2\\t{%S0. - %T0.}[%3], %1"; ++ } + [(set_attr "type" "neon_load2_one_lane")] + ) + +@@ -3955,15 +3958,19 @@ [(set_attr "type" "neon_store2_2reg")] ) @@ -563,19 +1088,39 @@ (define_insn "vec_store_lanesoi_lane" [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") (unspec: [(match_operand:OI 1 "register_operand" "w") -@@ -3962,7 +3963,10 @@ +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_ST2_LANE))] +- UNSPEC_ST2_LANE))] ++ UNSPEC_ST2_LANE))] "TARGET_SIMD" - "st2\\t{%S1. - %T1.}[%2], %0" +- [(set_attr "type" "neon_store3_one_lane")] + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "st2\\t{%S1. - %T1.}[%2], %0"; + } - [(set_attr "type" "neon_store3_one_lane")] ++ [(set_attr "type" "neon_store2_one_lane")] + ) + + (define_expand "vec_store_lanesoi" +@@ -4010,10 +4017,13 @@ + (unspec:CI [(match_operand: 1 "aarch64_simd_struct_operand" "Utv") + (match_operand:CI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD3_LANE))] + "TARGET_SIMD" +- "ld3\\t{%S0. - %U0.}[%3], %1" ++{ ++ operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); ++ return "ld3\\t{%S0. - %U0.}[%3], %1"; ++} + [(set_attr "type" "neon_load3_one_lane")] ) -@@ -4046,6 +4050,7 @@ +@@ -4046,14 +4056,18 @@ [(set_attr "type" "neon_store3_3reg")] ) @@ -583,9 +1128,11 @@ (define_insn "vec_store_lanesci_lane" [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") (unspec: [(match_operand:CI 1 "register_operand" "w") -@@ -4053,7 +4058,10 @@ +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_ST3_LANE))] +- UNSPEC_ST3_LANE))] ++ UNSPEC_ST3_LANE))] "TARGET_SIMD" - "st3\\t{%S1. - %U1.}[%2], %0" + { @@ -595,7 +1142,23 @@ [(set_attr "type" "neon_store3_one_lane")] ) -@@ -4137,6 +4145,7 @@ +@@ -4101,10 +4115,13 @@ + (unspec:XI [(match_operand: 1 "aarch64_simd_struct_operand" "Utv") + (match_operand:XI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD4_LANE))] + "TARGET_SIMD" +- "ld4\\t{%S0. - %V0.}[%3], %1" ++{ ++ operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); ++ return "ld4\\t{%S0. - %V0.}[%3], %1"; ++} + [(set_attr "type" "neon_load4_one_lane")] + ) + +@@ -4137,14 +4154,18 @@ [(set_attr "type" "neon_store4_4reg")] ) @@ -603,9 +1166,11 @@ (define_insn "vec_store_lanesxi_lane" [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") (unspec: [(match_operand:XI 1 "register_operand" "w") -@@ -4144,7 +4153,10 @@ +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_ST4_LANE))] +- UNSPEC_ST4_LANE))] ++ UNSPEC_ST4_LANE))] "TARGET_SIMD" - "st4\\t{%S1. - %V1.}[%2], %0" + { @@ -615,41 +1180,253 @@ [(set_attr "type" "neon_store4_one_lane")] ) +@@ -4554,14 +4575,12 @@ + (match_operand:DI 1 "register_operand" "w") + (match_operand:OI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + +- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), +- NULL); + emit_insn (gen_aarch64_vec_load_lanesoi_lane (operands[0], + mem, + operands[2], +@@ -4574,14 +4593,12 @@ + (match_operand:DI 1 "register_operand" "w") + (match_operand:CI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + +- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), +- NULL); + emit_insn (gen_aarch64_vec_load_lanesci_lane (operands[0], + mem, + operands[2], +@@ -4594,14 +4611,12 @@ + (match_operand:DI 1 "register_operand" "w") + (match_operand:XI 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + +- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), +- NULL); + emit_insn (gen_aarch64_vec_load_lanesxi_lane (operands[0], + mem, + operands[2], +@@ -4838,54 +4853,45 @@ + DONE; + }) + +-(define_expand "aarch64_st2_lane" ++(define_expand "aarch64_st2_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:OI 1 "register_operand" "w") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); +- operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + +- emit_insn (gen_vec_store_lanesoi_lane (mem, +- operands[1], +- operands[2])); ++ emit_insn (gen_vec_store_lanesoi_lane (mem, operands[1], operands[2])); + DONE; + }) + +-(define_expand "aarch64_st3_lane" ++(define_expand "aarch64_st3_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:CI 1 "register_operand" "w") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); +- operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + +- emit_insn (gen_vec_store_lanesci_lane (mem, +- operands[1], +- operands[2])); ++ emit_insn (gen_vec_store_lanesci_lane (mem, operands[1], operands[2])); + DONE; + }) + +-(define_expand "aarch64_st4_lane" ++(define_expand "aarch64_st4_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:XI 1 "register_operand" "w") +- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); +- operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + +- emit_insn (gen_vec_store_lanesxi_lane (mem, +- operands[1], +- operands[2])); ++ emit_insn (gen_vec_store_lanesxi_lane (mem, operands[1], operands[2])); + DONE; + }) + +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def +@@ -0,0 +1,34 @@ ++/* Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++/* Additional control over certain tuning parameters. Before including ++ this file, define a macro: ++ ++ AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit) ++ ++ Where: ++ ++ NAME is a string giving a friendly name for the tuning flag. ++ INTERNAL_NAME gives the internal name suitable for appending to ++ AARCH64_TUNE_ to give an enum name. ++ INDEX_BIT is the bit to set in the bitmask of supported tuning ++ flags. */ ++ ++AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0) ++ --- a/src/gcc/config/aarch64/aarch64.c +++ b/src/gcc/config/aarch64/aarch64.c -@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost = - #define AARCH64_FUSE_ADRP_LDR (1 << 3) - #define AARCH64_FUSE_CMP_BRANCH (1 << 4) +@@ -95,6 +95,7 @@ + #include "rtl-iter.h" + #include "tm-constrs.h" + #include "sched-int.h" ++#include "cortex-a57-fma-steering.h" + + /* Defined for convenience. */ + #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) +@@ -177,15 +178,42 @@ unsigned aarch64_architecture_version; + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; + +-/* The current tuning set. */ +-const struct tune_params *aarch64_tune_params; +- + /* Mask to specify which instructions we are allowed to generate. */ + unsigned long aarch64_isa_flags = 0; + + /* Mask to specify which instruction scheduling options should be used. */ + unsigned long aarch64_tune_flags = 0; + ++/* Support for command line parsing of boolean flags in the tuning ++ structures. */ ++struct aarch64_flag_desc ++{ ++ const char* name; ++ unsigned int flag; ++}; ++ ++#define AARCH64_FUSION_PAIR(name, internal_name, y) \ ++ { name, AARCH64_FUSE_##internal_name }, ++static const struct aarch64_flag_desc aarch64_fusible_pairs[] = ++{ ++ { "none", AARCH64_FUSE_NOTHING }, ++#include "aarch64-fusion-pairs.def" ++ { "all", AARCH64_FUSE_ALL }, ++ { NULL, AARCH64_FUSE_NOTHING } ++}; ++#undef AARCH64_FUION_PAIR ++ ++#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \ ++ { name, AARCH64_EXTRA_TUNE_##internal_name }, ++static const struct aarch64_flag_desc aarch64_tuning_flags[] = ++{ ++ { "none", AARCH64_EXTRA_TUNE_NONE }, ++#include "aarch64-tuning-flags.def" ++ { "all", AARCH64_EXTRA_TUNE_ALL }, ++ { NULL, AARCH64_EXTRA_TUNE_NONE } ++}; ++#undef AARCH64_EXTRA_TUNING_OPTION ++ + /* Tuning parameters. */ + + static const struct cpu_addrcost_table generic_addrcost_table = +@@ -332,12 +360,12 @@ static const struct cpu_vector_cost xgene1_vector_cost = + 1 /* cond_not_taken_branch_cost */ + }; +-#define AARCH64_FUSE_NOTHING (0) +-#define AARCH64_FUSE_MOV_MOVK (1 << 0) +-#define AARCH64_FUSE_ADRP_ADD (1 << 1) +-#define AARCH64_FUSE_MOVK_MOVK (1 << 2) +-#define AARCH64_FUSE_ADRP_LDR (1 << 3) +-#define AARCH64_FUSE_CMP_BRANCH (1 << 4) +/* Generic costs for branch instructions. */ +static const struct cpu_branch_cost generic_branch_cost = +{ + 2, /* Predictable. */ + 2 /* Unpredictable. */ +}; -+ + static const struct tune_params generic_tunings = { - &cortexa57_extra_costs, +@@ -345,15 +373,19 @@ static const struct tune_params generic_tunings = &generic_addrcost_table, &generic_regmove_cost, &generic_vector_cost, + &generic_branch_cost, 4, /* memmov_cost */ 2, /* issue_rate */ - AARCH64_FUSE_NOTHING, /* fuseable_ops */ -@@ -353,7 +361,9 @@ static const struct tune_params generic_tunings = +- AARCH64_FUSE_NOTHING, /* fuseable_ops */ ++ AARCH64_FUSE_NOTHING, /* fusible_ops */ + 8, /* function_align. */ + 8, /* jump_align. */ 4, /* loop_align. */ 2, /* int_reassoc_width. */ 4, /* fp_reassoc_width. */ - 1 /* vec_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ -+ 2 /* min_div_recip_mul_df. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params cortexa53_tunings = -@@ -362,6 +372,7 @@ static const struct tune_params cortexa53_tunings = +@@ -362,16 +394,20 @@ static const struct tune_params cortexa53_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, @@ -657,75 +1434,129 @@ 4, /* memmov_cost */ 2, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD -@@ -371,7 +382,9 @@ static const struct tune_params cortexa53_tunings = +- | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */ ++ | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ + 8, /* function_align. */ + 8, /* jump_align. */ 4, /* loop_align. */ 2, /* int_reassoc_width. */ 4, /* fp_reassoc_width. */ - 1 /* vec_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ -+ 2 /* min_div_recip_mul_df. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params cortexa57_tunings = -@@ -380,6 +393,7 @@ static const struct tune_params cortexa57_tunings = +@@ -380,16 +416,42 @@ static const struct tune_params cortexa57_tunings = &cortexa57_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, + &generic_branch_cost, ++ 4, /* memmov_cost */ ++ 3, /* issue_rate */ ++ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD ++ | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ ++ 16, /* function_align. */ ++ 8, /* jump_align. */ ++ 4, /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */ ++}; ++ ++static const struct tune_params cortexa72_tunings = ++{ ++ &cortexa57_extra_costs, ++ &cortexa57_addrcost_table, ++ &cortexa57_regmove_cost, ++ &cortexa57_vector_cost, ++ &generic_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD -@@ -389,7 +403,9 @@ static const struct tune_params cortexa57_tunings = +- | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */ ++ | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ + 16, /* function_align. */ + 8, /* jump_align. */ 4, /* loop_align. */ 2, /* int_reassoc_width. */ 4, /* fp_reassoc_width. */ - 1 /* vec_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ -+ 2 /* min_div_recip_mul_df. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params thunderx_tunings = -@@ -398,6 +414,7 @@ static const struct tune_params thunderx_tunings = +@@ -398,15 +460,19 @@ static const struct tune_params thunderx_tunings = &generic_addrcost_table, &thunderx_regmove_cost, &generic_vector_cost, + &generic_branch_cost, 6, /* memmov_cost */ 2, /* issue_rate */ - AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */ -@@ -406,7 +423,9 @@ static const struct tune_params thunderx_tunings = +- AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */ ++ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ + 8, /* function_align. */ + 8, /* jump_align. */ 8, /* loop_align. */ 2, /* int_reassoc_width. */ 4, /* fp_reassoc_width. */ - 1 /* vec_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ -+ 2 /* min_div_recip_mul_df. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params xgene1_tunings = -@@ -415,6 +434,7 @@ static const struct tune_params xgene1_tunings = +@@ -415,15 +481,37 @@ static const struct tune_params xgene1_tunings = &xgene1_addrcost_table, &xgene1_regmove_cost, &xgene1_vector_cost, + &generic_branch_cost, 6, /* memmov_cost */ 4, /* issue_rate */ - AARCH64_FUSE_NOTHING, /* fuseable_ops */ -@@ -423,7 +443,9 @@ static const struct tune_params xgene1_tunings = +- AARCH64_FUSE_NOTHING, /* fuseable_ops */ ++ AARCH64_FUSE_NOTHING, /* fusible_ops */ + 16, /* function_align. */ + 8, /* jump_align. */ 16, /* loop_align. */ 2, /* int_reassoc_width. */ 4, /* fp_reassoc_width. */ - 1 /* vec_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ -+ 2 /* min_div_recip_mul_df. */ ++ 2, /* min_div_recip_mul_df. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ ++}; ++ ++/* Support for fine-grained override of the tuning structures. */ ++struct aarch64_tuning_override_function ++{ ++ const char* name; ++ void (*parse_override)(const char*, struct tune_params*); ++}; ++ ++static void aarch64_parse_fuse_string (const char*, struct tune_params*); ++static void aarch64_parse_tune_string (const char*, struct tune_params*); ++ ++static const struct aarch64_tuning_override_function ++aarch64_tuning_override_functions[] = ++{ ++ { "fuse", aarch64_parse_fuse_string }, ++ { "tune", aarch64_parse_tune_string }, ++ { NULL, NULL } }; /* A processor implementing AArch64. */ -@@ -440,7 +462,7 @@ struct processor +@@ -440,7 +528,7 @@ struct processor /* Processor cores implementing AArch64. */ static const struct processor all_cores[] = { @@ -734,7 +1565,17 @@ {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings}, #include "aarch64-cores.def" #undef AARCH64_CORE -@@ -477,7 +499,7 @@ struct aarch64_option_extension +@@ -464,6 +552,9 @@ static const struct processor *selected_arch; + static const struct processor *selected_cpu; + static const struct processor *selected_tune; + ++/* The current tuning set. */ ++struct tune_params aarch64_tune_params = generic_tunings; ++ + #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) + + /* An ISA extension in the co-processor and main instruction set space. */ +@@ -477,7 +568,7 @@ struct aarch64_option_extension /* ISA extensions in AArch64. */ static const struct aarch64_option_extension all_extensions[] = { @@ -743,21 +1584,75 @@ {NAME, FLAGS_ON, FLAGS_OFF}, #include "aarch64-option-extensions.def" #undef AARCH64_OPT_EXTENSION -@@ -512,9 +534,11 @@ static const char * const aarch64_condition_codes[] = +@@ -511,10 +602,22 @@ static const char * const aarch64_condition_codes[] = + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; ++void ++aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg) ++{ ++ const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector"; ++ if (TARGET_GENERAL_REGS_ONLY) ++ error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg); ++ else ++ error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg); ++} ++ static unsigned int -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED) +aarch64_min_divisions_for_recip_mul (enum machine_mode mode) { - return 2; + if (GET_MODE_UNIT_SIZE (mode) == 4) -+ return aarch64_tune_params->min_div_recip_mul_sf; -+ return aarch64_tune_params->min_div_recip_mul_df; ++ return aarch64_tune_params.min_div_recip_mul_sf; ++ return aarch64_tune_params.min_div_recip_mul_df; } static int -@@ -4901,8 +4925,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) +@@ -522,11 +625,11 @@ aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, + enum machine_mode mode) + { + if (VECTOR_MODE_P (mode)) +- return aarch64_tune_params->vec_reassoc_width; ++ return aarch64_tune_params.vec_reassoc_width; + if (INTEGRAL_MODE_P (mode)) +- return aarch64_tune_params->int_reassoc_width; ++ return aarch64_tune_params.int_reassoc_width; + if (FLOAT_MODE_P (mode)) +- return aarch64_tune_params->fp_reassoc_width; ++ return aarch64_tune_params.fp_reassoc_width; + return 1; + } + +@@ -1763,6 +1866,9 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, + and homogenous short-vector aggregates (HVA). */ + if (allocate_nvrn) + { ++ if (!TARGET_FLOAT) ++ aarch64_err_no_fpadvsimd (mode, "argument"); ++ + if (nvrn + nregs <= NUM_FP_ARG_REGS) + { + pcum->aapcs_nextnvrn = nvrn + nregs; +@@ -1889,6 +1995,17 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; + ++ if (!TARGET_FLOAT ++ && fndecl && TREE_PUBLIC (fndecl) ++ && fntype && fntype != error_mark_node) ++ { ++ const_tree type = TREE_TYPE (fntype); ++ machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */ ++ int nregs ATTRIBUTE_UNUSED; /* Likewise. */ ++ if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type, ++ &mode, &nregs, NULL)) ++ aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type"); ++ } + return; + } + +@@ -4901,8 +5018,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) case FP_REGS: case FP_LO_REGS: return @@ -769,7 +1664,7 @@ case STACK_REG: return 1; -@@ -5157,9 +5182,18 @@ aarch64_strip_extend (rtx x) +@@ -5157,9 +5275,18 @@ aarch64_strip_extend (rtx x) return x; } @@ -790,16 +1685,19 @@ operands where needed. */ static int -@@ -5169,7 +5203,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5167,9 +5294,9 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) + { + rtx op0, op1; const struct cpu_cost_table *extra_cost - = aarch64_tune_params->insn_extra_cost; +- = aarch64_tune_params->insn_extra_cost; ++ = aarch64_tune_params.insn_extra_cost; int cost = 0; - bool maybe_fma = (outer == PLUS || outer == MINUS); + bool compound_p = (outer == PLUS || outer == MINUS); machine_mode mode = GET_MODE (x); gcc_checking_assert (code == MULT); -@@ -5184,24 +5218,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5184,24 +5311,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) if (GET_MODE_CLASS (mode) == MODE_INT) { /* The multiply will be canonicalized as a shift, cost it as such. */ @@ -856,7 +1754,7 @@ /* Integer multiplies or FMAs have zero/sign extending variants. */ if ((GET_CODE (op0) == ZERO_EXTEND && GET_CODE (op1) == ZERO_EXTEND) -@@ -5213,8 +5273,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5213,8 +5366,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) if (speed) { @@ -867,7 +1765,7 @@ cost += extra_cost->mult[0].extend_add; else /* MUL/SMULL/UMULL. */ -@@ -5224,15 +5284,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5224,15 +5377,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) return cost; } @@ -886,7 +1784,7 @@ cost += extra_cost->mult[mode == DImode].add; else /* MUL. */ -@@ -5250,7 +5310,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5250,7 +5403,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) which case FNMUL is different than FMUL with operand negation. */ bool neg0 = GET_CODE (op0) == NEG; bool neg1 = GET_CODE (op1) == NEG; @@ -895,7 +1793,7 @@ { if (neg0) op0 = XEXP (op0, 0); -@@ -5258,7 +5318,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +@@ -5258,7 +5411,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) op1 = XEXP (op1, 0); } @@ -904,7 +1802,16 @@ /* FMADD/FNMADD/FNMSUB/FMSUB. */ cost += extra_cost->fp[mode == DFmode].fma; else -@@ -5367,6 +5427,23 @@ aarch64_address_cost (rtx x, +@@ -5279,7 +5432,7 @@ aarch64_address_cost (rtx x, + bool speed) + { + enum rtx_code c = GET_CODE (x); +- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; ++ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost; + struct aarch64_address_info info; + int cost = 0; + info.shift = 0; +@@ -5367,6 +5520,23 @@ aarch64_address_cost (rtx x, return cost; } @@ -917,7 +1824,7 @@ +{ + /* When optimizing for speed, use the cost of unpredictable branches. */ + const struct cpu_branch_cost *branch_costs = -+ aarch64_tune_params->branch_costs; ++ aarch64_tune_params.branch_costs; + + if (!speed_p || predictable_p) + return branch_costs->predictable; @@ -928,7 +1835,7 @@ /* Return true if the RTX X in mode MODE is a zero or sign extract usable in an ADD or SUB (extended register) instruction. */ static bool -@@ -5415,6 +5492,51 @@ aarch64_frint_unspec_p (unsigned int u) +@@ -5415,6 +5585,51 @@ aarch64_frint_unspec_p (unsigned int u) } } @@ -980,7 +1887,16 @@ /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), storing it in *COST. Result is true if the total cost of the operation has now been calculated. */ -@@ -5505,16 +5627,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5497,7 +5712,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, + { + rtx op0, op1, op2; + const struct cpu_cost_table *extra_cost +- = aarch64_tune_params->insn_extra_cost; ++ = aarch64_tune_params.insn_extra_cost; + machine_mode mode = GET_MODE (x); + + /* By default, assume that everything has equivalent cost to the +@@ -5505,16 +5720,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, above this default. */ *cost = COSTS_N_INSNS (1); @@ -997,7 +1913,7 @@ switch (code) { case SET: -@@ -5529,7 +5641,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5529,7 +5734,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (speed) { rtx address = XEXP (op0, 0); @@ -1008,7 +1924,7 @@ *cost += extra_cost->ldst.store; else if (mode == SFmode) *cost += extra_cost->ldst.storef; -@@ -5550,15 +5664,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5550,15 +5757,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, /* Fall through. */ case REG: @@ -1037,7 +1953,7 @@ else /* Cost is just the cost of the RHS of the set. */ *cost += rtx_cost (op1, SET, 1, speed); -@@ -5656,7 +5777,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5656,7 +5870,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, approximation for the additional cost of the addressing mode. */ rtx address = XEXP (x, 0); @@ -1048,7 +1964,7 @@ *cost += extra_cost->ldst.load; else if (mode == SFmode) *cost += extra_cost->ldst.loadf; -@@ -5673,6 +5796,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5673,6 +5889,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case NEG: op0 = XEXP (x, 0); @@ -1065,7 +1981,7 @@ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) { if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE -@@ -5717,7 +5850,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5717,7 +5943,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case CLRSB: case CLZ: if (speed) @@ -1079,7 +1995,7 @@ return false; -@@ -5796,12 +5934,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5796,12 +6027,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) { @@ -1107,7 +2023,7 @@ return false; case MINUS: -@@ -5810,6 +5963,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, +@@ -5810,6 +6056,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, op1 = XEXP (x, 1); cost_minus: @@ -1116,7 +2032,7 @@ /* Detect valid immediates. */ if ((GET_MODE_CLASS (mode) == MODE_INT || (GET_MODE_CLASS (mode) == MODE_CC -@@ -5817,20 +5972,17 @@ cost_minus: +@@ -5817,20 +6065,17 @@ cost_minus: && CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) { @@ -1138,7 +2054,7 @@ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), (enum rtx_code) GET_CODE (op1), -@@ -5842,13 +5994,12 @@ cost_minus: +@@ -5842,13 +6087,12 @@ cost_minus: /* Cost this as an FMA-alike operation. */ if ((GET_CODE (new_op1) == MULT @@ -1153,7 +2069,7 @@ return true; } -@@ -5856,12 +6007,21 @@ cost_minus: +@@ -5856,12 +6100,21 @@ cost_minus: if (speed) { @@ -1180,7 +2096,7 @@ } return true; } -@@ -5895,11 +6055,13 @@ cost_plus: +@@ -5895,11 +6148,13 @@ cost_plus: return true; } @@ -1195,7 +2111,7 @@ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), (enum rtx_code) GET_CODE (op0), -@@ -5912,25 +6074,32 @@ cost_plus: +@@ -5912,25 +6167,32 @@ cost_plus: new_op0 = aarch64_strip_extend (op0); if (GET_CODE (new_op0) == MULT @@ -1237,7 +2153,7 @@ } return true; } -@@ -5939,8 +6108,12 @@ cost_plus: +@@ -5939,8 +6201,12 @@ cost_plus: *cost = COSTS_N_INSNS (1); if (speed) @@ -1252,7 +2168,7 @@ return false; case IOR: -@@ -5948,8 +6121,22 @@ cost_plus: +@@ -5948,8 +6214,22 @@ cost_plus: { *cost = COSTS_N_INSNS (1); @@ -1276,7 +2192,7 @@ return true; } -@@ -5960,6 +6147,13 @@ cost_plus: +@@ -5960,6 +6240,13 @@ cost_plus: op0 = XEXP (x, 0); op1 = XEXP (x, 1); @@ -1290,7 +2206,7 @@ if (code == AND && GET_CODE (op0) == MULT && CONST_INT_P (XEXP (op0, 1)) -@@ -6025,13 +6219,52 @@ cost_plus: +@@ -6025,13 +6312,52 @@ cost_plus: return false; case NOT: @@ -1346,7 +2262,7 @@ return false; case ZERO_EXTEND: -@@ -6067,10 +6300,19 @@ cost_plus: +@@ -6067,10 +6393,19 @@ cost_plus: return true; } @@ -1369,7 +2285,7 @@ return false; case SIGN_EXTEND: -@@ -6090,7 +6332,12 @@ cost_plus: +@@ -6090,7 +6425,12 @@ cost_plus: } if (speed) @@ -1383,7 +2299,7 @@ return false; case ASHIFT: -@@ -6099,10 +6346,20 @@ cost_plus: +@@ -6099,10 +6439,20 @@ cost_plus: if (CONST_INT_P (op1)) { @@ -1407,7 +2323,7 @@ /* We can incorporate zero/sign extend for free. */ if (GET_CODE (op0) == ZERO_EXTEND -@@ -6114,10 +6371,19 @@ cost_plus: +@@ -6114,10 +6464,19 @@ cost_plus: } else { @@ -1430,7 +2346,7 @@ return false; /* All arguments need to be in registers. */ } -@@ -6132,7 +6398,12 @@ cost_plus: +@@ -6132,7 +6491,12 @@ cost_plus: { /* ASR (immediate) and friends. */ if (speed) @@ -1444,7 +2360,7 @@ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); return true; -@@ -6142,8 +6413,12 @@ cost_plus: +@@ -6142,8 +6506,12 @@ cost_plus: /* ASR (register) and friends. */ if (speed) @@ -1459,7 +2375,7 @@ return false; /* All arguments need to be in registers. */ } -@@ -6191,7 +6466,12 @@ cost_plus: +@@ -6191,7 +6559,12 @@ cost_plus: case SIGN_EXTRACT: /* UBFX/SBFX. */ if (speed) @@ -1473,7 +2389,7 @@ /* We can trust that the immediates used will be correct (there are no by-register forms), so we need only cost op0. */ -@@ -6208,7 +6488,9 @@ cost_plus: +@@ -6208,7 +6581,9 @@ cost_plus: case UMOD: if (speed) { @@ -1484,7 +2400,7 @@ *cost += (extra_cost->mult[GET_MODE (x) == DImode].add + extra_cost->mult[GET_MODE (x) == DImode].idiv); else if (GET_MODE (x) == DFmode) -@@ -6225,7 +6507,9 @@ cost_plus: +@@ -6225,7 +6600,9 @@ cost_plus: case SQRT: if (speed) { @@ -1495,7 +2411,7 @@ /* There is no integer SQRT, so only DIV and UDIV can get here. */ *cost += extra_cost->mult[mode == DImode].idiv; -@@ -6257,7 +6541,12 @@ cost_plus: +@@ -6257,7 +6634,12 @@ cost_plus: op2 = XEXP (x, 2); if (speed) @@ -1509,7 +2425,7 @@ /* FMSUB, FNMADD, and FNMSUB are free. */ if (GET_CODE (op0) == NEG) -@@ -6295,14 +6584,36 @@ cost_plus: +@@ -6295,14 +6677,36 @@ cost_plus: *cost += rtx_cost (op2, FMA, 2, speed); return true; @@ -1548,7 +2464,7 @@ return false; case FIX: -@@ -6323,15 +6634,37 @@ cost_plus: +@@ -6323,15 +6727,37 @@ cost_plus: } if (speed) @@ -1566,14 +2482,14 @@ case ABS: - if (GET_MODE_CLASS (mode) == MODE_FLOAT) + if (VECTOR_MODE_P (mode)) - { -- /* FABS and FNEG are analogous. */ ++ { + /* ABS (vector). */ + if (speed) + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ { + { +- /* FABS and FNEG are analogous. */ + op0 = XEXP (x, 0); + + /* FABD, which is analogous to FADD. */ @@ -1590,7 +2506,7 @@ if (speed) *cost += extra_cost->fp[mode == DFmode].neg; } -@@ -6350,10 +6683,15 @@ cost_plus: +@@ -6350,10 +6776,15 @@ cost_plus: case SMIN: if (speed) { @@ -1610,44 +2526,406 @@ } return false; -@@ -7830,6 +8168,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) - return -1; +@@ -6447,7 +6878,7 @@ aarch64_register_move_cost (machine_mode mode, + enum reg_class from = (enum reg_class) from_i; + enum reg_class to = (enum reg_class) to_i; + const struct cpu_regmove_cost *regmove_cost +- = aarch64_tune_params->regmove_cost; ++ = aarch64_tune_params.regmove_cost; + + /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ + if (to == CALLER_SAVE_REGS || to == POINTER_REGS) +@@ -6502,14 +6933,14 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) + { +- return aarch64_tune_params->memmov_cost; ++ return aarch64_tune_params.memmov_cost; } -+/* Return TRUE if the type, as described by TYPE and MODE, is a short vector -+ type as described in AAPCS64 \S 4.1.2. + /* Return the number of instructions that can be issued per cycle. */ + static int + aarch64_sched_issue_rate (void) + { +- return aarch64_tune_params->issue_rate; ++ return aarch64_tune_params.issue_rate; + } + + static int +@@ -6533,44 +6964,44 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + switch (type_of_cost) + { + case scalar_stmt: +- return aarch64_tune_params->vec_costs->scalar_stmt_cost; ++ return aarch64_tune_params.vec_costs->scalar_stmt_cost; + + case scalar_load: +- return aarch64_tune_params->vec_costs->scalar_load_cost; ++ return aarch64_tune_params.vec_costs->scalar_load_cost; + + case scalar_store: +- return aarch64_tune_params->vec_costs->scalar_store_cost; ++ return aarch64_tune_params.vec_costs->scalar_store_cost; + + case vector_stmt: +- return aarch64_tune_params->vec_costs->vec_stmt_cost; ++ return aarch64_tune_params.vec_costs->vec_stmt_cost; + + case vector_load: +- return aarch64_tune_params->vec_costs->vec_align_load_cost; ++ return aarch64_tune_params.vec_costs->vec_align_load_cost; + + case vector_store: +- return aarch64_tune_params->vec_costs->vec_store_cost; ++ return aarch64_tune_params.vec_costs->vec_store_cost; + + case vec_to_scalar: +- return aarch64_tune_params->vec_costs->vec_to_scalar_cost; ++ return aarch64_tune_params.vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: +- return aarch64_tune_params->vec_costs->scalar_to_vec_cost; ++ return aarch64_tune_params.vec_costs->scalar_to_vec_cost; + + case unaligned_load: +- return aarch64_tune_params->vec_costs->vec_unalign_load_cost; ++ return aarch64_tune_params.vec_costs->vec_unalign_load_cost; + + case unaligned_store: +- return aarch64_tune_params->vec_costs->vec_unalign_store_cost; ++ return aarch64_tune_params.vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: +- return aarch64_tune_params->vec_costs->cond_taken_branch_cost; ++ return aarch64_tune_params.vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: +- return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost; ++ return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: +- return aarch64_tune_params->vec_costs->vec_stmt_cost; ++ return aarch64_tune_params.vec_costs->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); +@@ -6813,9 +7244,181 @@ aarch64_parse_tune (void) + return; + } + ++/* Parse TOKEN, which has length LENGTH to see if it is an option ++ described in FLAG. If it is, return the index bit for that fusion type. ++ If not, error (printing OPTION_NAME) and return zero. */ ++ ++static unsigned int ++aarch64_parse_one_option_token (const char *token, ++ size_t length, ++ const struct aarch64_flag_desc *flag, ++ const char *option_name) ++{ ++ for (; flag->name != NULL; flag++) ++ { ++ if (length == strlen (flag->name) ++ && !strncmp (flag->name, token, length)) ++ return flag->flag; ++ } + -+ See the comment above aarch64_composite_type_p for the notes on MODE. */ ++ error ("unknown flag passed in -moverride=%s (%s)", option_name, token); ++ return 0; ++} + -+static bool -+aarch64_short_vector_p (const_tree type, -+ machine_mode mode) ++/* Parse OPTION which is a comma-separated list of flags to enable. ++ FLAGS gives the list of flags we understand, INITIAL_STATE gives any ++ default state we inherit from the CPU tuning structures. OPTION_NAME ++ gives the top-level option we are parsing in the -moverride string, ++ for use in error messages. */ ++ ++static unsigned int ++aarch64_parse_boolean_options (const char *option, ++ const struct aarch64_flag_desc *flags, ++ unsigned int initial_state, ++ const char *option_name) ++{ ++ const char separator = '.'; ++ const char* specs = option; ++ const char* ntoken = option; ++ unsigned int found_flags = initial_state; ++ ++ while ((ntoken = strchr (specs, separator))) ++ { ++ size_t token_length = ntoken - specs; ++ unsigned token_ops = aarch64_parse_one_option_token (specs, ++ token_length, ++ flags, ++ option_name); ++ /* If we find "none" (or, for simplicity's sake, an error) anywhere ++ in the token stream, reset the supported operations. So: ++ ++ adrp+add.cmp+branch.none.adrp+add ++ ++ would have the result of turning on only adrp+add fusion. */ ++ if (!token_ops) ++ found_flags = 0; ++ ++ found_flags |= token_ops; ++ specs = ++ntoken; ++ } ++ ++ /* We ended with a comma, print something. */ ++ if (!(*specs)) ++ { ++ error ("%s string ill-formed\n", option_name); ++ return 0; ++ } ++ ++ /* We still have one more token to parse. */ ++ size_t token_length = strlen (specs); ++ unsigned token_ops = aarch64_parse_one_option_token (specs, ++ token_length, ++ flags, ++ option_name); ++ if (!token_ops) ++ found_flags = 0; ++ ++ found_flags |= token_ops; ++ return found_flags; ++} ++ ++/* Support for overriding instruction fusion. */ ++ ++static void ++aarch64_parse_fuse_string (const char *fuse_string, ++ struct tune_params *tune) +{ -+ HOST_WIDE_INT size = -1; ++ tune->fusible_ops = aarch64_parse_boolean_options (fuse_string, ++ aarch64_fusible_pairs, ++ tune->fusible_ops, ++ "fuse="); ++} + -+ if (type && TREE_CODE (type) == VECTOR_TYPE) -+ size = int_size_in_bytes (type); -+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT -+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) -+ size = GET_MODE_SIZE (mode); ++/* Support for overriding other tuning flags. */ + -+ return (size == 8 || size == 16); ++static void ++aarch64_parse_tune_string (const char *tune_string, ++ struct tune_params *tune) ++{ ++ tune->extra_tuning_flags ++ = aarch64_parse_boolean_options (tune_string, ++ aarch64_tuning_flags, ++ tune->extra_tuning_flags, ++ "tune="); +} + - /* Return TRUE if the type, as described by TYPE and MODE, is a composite - type as described in AAPCS64 \S 4.3. This includes aggregate, union and - array types. The C99 floating-point complex types are also considered -@@ -7851,6 +8209,9 @@ static bool - aarch64_composite_type_p (const_tree type, - machine_mode mode) - { -+ if (aarch64_short_vector_p (type, mode)) -+ return false; ++/* Parse TOKEN, which has length LENGTH to see if it is a tuning option ++ we understand. If it is, extract the option string and handoff to ++ the appropriate function. */ + - if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)) ++void ++aarch64_parse_one_override_token (const char* token, ++ size_t length, ++ struct tune_params *tune) ++{ ++ const struct aarch64_tuning_override_function *fn ++ = aarch64_tuning_override_functions; ++ ++ const char *option_part = strchr (token, '='); ++ if (!option_part) ++ { ++ error ("tuning string missing in option (%s)", token); ++ return; ++ } ++ ++ /* Get the length of the option name. */ ++ length = option_part - token; ++ /* Skip the '=' to get to the option string. */ ++ option_part++; ++ ++ for (; fn->name != NULL; fn++) ++ { ++ if (!strncmp (fn->name, token, length)) ++ { ++ fn->parse_override (option_part, tune); ++ return; ++ } ++ } ++ ++ error ("unknown tuning option (%s)",token); ++ return; ++} ++ ++/* Parse STRING looking for options in the format: ++ string :: option:string ++ option :: name=substring ++ name :: {a-z} ++ substring :: defined by option. */ ++ ++static void ++aarch64_parse_override_string (const char* input_string, ++ struct tune_params* tune) ++{ ++ const char separator = ':'; ++ size_t string_length = strlen (input_string) + 1; ++ char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length); ++ char *string = string_root; ++ strncpy (string, input_string, string_length); ++ string[string_length - 1] = '\0'; ++ ++ char* ntoken = string; ++ ++ while ((ntoken = strchr (string, separator))) ++ { ++ size_t token_length = ntoken - string; ++ /* Make this substring look like a string. */ ++ *ntoken = '\0'; ++ aarch64_parse_one_override_token (string, token_length, tune); ++ string = ++ntoken; ++ } ++ ++ /* One last option to parse. */ ++ aarch64_parse_one_override_token (string, strlen (string), tune); ++ free (string_root); ++} ++ ++/* Implement TARGET_OPTION_OVERRIDE. */ + +-/* Implement TARGET_OPTION_OVERRIDE. */ +- + static void + aarch64_override_options (void) + { +@@ -6872,9 +7475,15 @@ aarch64_override_options (void) + + aarch64_tune_flags = selected_tune->flags; + aarch64_tune = selected_tune->core; +- aarch64_tune_params = selected_tune->tune; ++ /* Make a copy of the tuning parameters attached to the core, which ++ we may later overwrite. */ ++ aarch64_tune_params = *(selected_tune->tune); + aarch64_architecture_version = selected_cpu->architecture_version; + ++ if (aarch64_override_tune_string) ++ aarch64_parse_override_string (aarch64_override_tune_string, ++ &aarch64_tune_params); ++ + if (aarch64_fix_a53_err835769 == 2) + { + #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT +@@ -6884,6 +7493,8 @@ aarch64_override_options (void) + #endif + } + ++ aarch64_register_fma_steering (); ++ + aarch64_override_options_after_change (); + } + +@@ -6902,11 +7513,11 @@ aarch64_override_options_after_change (void) + if (!optimize_size) + { + if (align_loops <= 0) +- align_loops = aarch64_tune_params->loop_align; ++ align_loops = aarch64_tune_params.loop_align; + if (align_jumps <= 0) +- align_jumps = aarch64_tune_params->jump_align; ++ align_jumps = aarch64_tune_params.jump_align; + if (align_functions <= 0) +- align_functions = aarch64_tune_params->function_align; ++ align_functions = aarch64_tune_params.function_align; + } + } + +@@ -7104,16 +7715,13 @@ aarch64_valid_floating_const (machine_mode mode, rtx x) + if (!CONST_DOUBLE_P (x)) + return false; + +- /* TODO: We could handle moving 0.0 to a TFmode register, +- but first we would like to refactor the movtf_aarch64 +- to be more amicable to split moves properly and +- correctly gate on TARGET_SIMD. For now - reject all +- constants which are not to SFmode or DFmode registers. */ ++ if (aarch64_float_const_zero_rtx_p (x)) ++ return true; ++ ++ /* We only handle moving 0.0 to a TFmode register. */ + if (!(mode == SFmode || mode == DFmode)) + return false; + +- if (aarch64_float_const_zero_rtx_p (x)) +- return true; + return aarch64_float_const_representable_p (x); + } + +@@ -7247,9 +7855,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) + + if (!TARGET_FLOAT) + { +- if (cum->aapcs_nvrn > 0) +- sorry ("%qs and floating point or vector arguments", +- "-mgeneral-regs-only"); ++ gcc_assert (cum->aapcs_nvrn == 0); + vr_save_area_size = 0; + } + +@@ -7356,8 +7962,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + { + /* TYPE passed in fp/simd registers. */ + if (!TARGET_FLOAT) +- sorry ("%qs and floating point or vector arguments", +- "-mgeneral-regs-only"); ++ aarch64_err_no_fpadvsimd (mode, "varargs"); + + f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), + unshare_expr (valist), f_vrtop, NULL_TREE); +@@ -7594,9 +8199,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, + + if (!TARGET_FLOAT) + { +- if (local_cum.aapcs_nvrn > 0) +- sorry ("%qs and floating point or vector arguments", +- "-mgeneral-regs-only"); ++ gcc_assert (local_cum.aapcs_nvrn == 0); + vr_saved = 0; + } + +@@ -7830,6 +8433,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) + return -1; + } + ++/* Return TRUE if the type, as described by TYPE and MODE, is a short vector ++ type as described in AAPCS64 \S 4.1.2. ++ ++ See the comment above aarch64_composite_type_p for the notes on MODE. */ ++ ++static bool ++aarch64_short_vector_p (const_tree type, ++ machine_mode mode) ++{ ++ HOST_WIDE_INT size = -1; ++ ++ if (type && TREE_CODE (type) == VECTOR_TYPE) ++ size = int_size_in_bytes (type); ++ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) ++ size = GET_MODE_SIZE (mode); ++ ++ return (size == 8 || size == 16); ++} ++ + /* Return TRUE if the type, as described by TYPE and MODE, is a composite + type as described in AAPCS64 \S 4.3. This includes aggregate, union and + array types. The C99 floating-point complex types are also considered +@@ -7851,6 +8474,9 @@ static bool + aarch64_composite_type_p (const_tree type, + machine_mode mode) + { ++ if (aarch64_short_vector_p (type, mode)) ++ return false; ++ + if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)) return true; -@@ -7862,27 +8223,6 @@ aarch64_composite_type_p (const_tree type, +@@ -7862,27 +8488,6 @@ aarch64_composite_type_p (const_tree type, return false; } @@ -1675,7 +2953,15 @@ /* Return TRUE if an argument, whose type is described by TYPE and MODE, shall be passed or returned in simd/fp register(s) (providing these parameter passing registers are available). -@@ -8581,24 +8921,6 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, +@@ -8575,30 +9180,12 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, + if (lane < low || lane >= high) + { + if (exp) +- error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1); ++ error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1); + else +- error ("lane %ld out of range %ld - %ld", lane, low, high - 1); ++ error ("lane %wd out of range %wd - %wd", lane, low, high - 1); } } @@ -1700,7 +2986,7 @@ /* Return TRUE if OP is a valid vector addressing mode. */ bool aarch64_simd_mem_operand_p (rtx op) -@@ -8781,22 +9103,19 @@ aarch64_expand_vector_init (rtx target, rtx vals) +@@ -8781,22 +9368,19 @@ aarch64_expand_vector_init (rtx target, rtx vals) machine_mode mode = GET_MODE (target); machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); @@ -1731,7 +3017,7 @@ all_same = false; } -@@ -8813,36 +9132,60 @@ aarch64_expand_vector_init (rtx target, rtx vals) +@@ -8813,36 +9397,60 @@ aarch64_expand_vector_init (rtx target, rtx vals) /* Splat a single non-constant element if we can. */ if (all_same) { @@ -1807,1411 +3093,9556 @@ emit_move_insn (adjust_address_nv (mem, inner_mode, i * GET_MODE_SIZE (inner_mode)), XVECEXP (vals, 0, i)); ---- a/src/gcc/config/aarch64/aarch64.h -+++ b/src/gcc/config/aarch64/aarch64.h -@@ -506,7 +506,7 @@ enum reg_class - - enum target_cpus +@@ -9015,7 +9623,23 @@ aarch64_expand_compare_and_swap (rtx operands[]) { --#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \ -+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ - TARGET_CPU_##INTERNAL_IDENT, - #include "aarch64-cores.def" - #undef AARCH64_CORE -@@ -823,7 +823,8 @@ do { \ - #define TRAMPOLINE_SECTION text_section + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + machine_mode mode, cmp_mode; +- rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ int idx; ++ gen_cas_fn gen; ++ const gen_cas_fn split_cas[] = ++ { ++ gen_aarch64_compare_and_swapqi, ++ gen_aarch64_compare_and_swaphi, ++ gen_aarch64_compare_and_swapsi, ++ gen_aarch64_compare_and_swapdi ++ }; ++ const gen_cas_fn atomic_cas[] = ++ { ++ gen_aarch64_compare_and_swapqi_lse, ++ gen_aarch64_compare_and_swaphi_lse, ++ gen_aarch64_compare_and_swapsi_lse, ++ gen_aarch64_compare_and_swapdi_lse ++ }; + + bval = operands[0]; + rval = operands[1]; +@@ -9060,13 +9684,17 @@ aarch64_expand_compare_and_swap (rtx operands[]) - /* To start with. */ --#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2 -+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \ -+ (aarch64_branch_cost (SPEED_P, PREDICTABLE_P)) - + switch (mode) + { +- case QImode: gen = gen_atomic_compare_and_swapqi_1; break; +- case HImode: gen = gen_atomic_compare_and_swaphi_1; break; +- case SImode: gen = gen_atomic_compare_and_swapsi_1; break; +- case DImode: gen = gen_atomic_compare_and_swapdi_1; break; ++ case QImode: idx = 0; break; ++ case HImode: idx = 1; break; ++ case SImode: idx = 2; break; ++ case DImode: idx = 3; break; + default: + gcc_unreachable (); + } ++ if (TARGET_LSE) ++ gen = atomic_cas[idx]; ++ else ++ gen = split_cas[idx]; - /* Assembly output. */ -@@ -929,11 +930,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); - #define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ - { "rewrite_mcpu", aarch64_rewrite_mcpu }, + emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); -+#if defined(__aarch64__) -+extern const char *host_detect_local_cpu (int argc, const char **argv); -+# define EXTRA_SPEC_FUNCTIONS \ -+ { "local_cpu_detect", host_detect_local_cpu }, \ -+ BIG_LITTLE_CPU_SPEC_FUNCTIONS +@@ -9078,6 +9706,32 @@ aarch64_expand_compare_and_swap (rtx operands[]) + emit_insn (gen_rtx_SET (VOIDmode, bval, x)); + } + ++/* Test whether the target supports using a atomic load-operate instruction. ++ CODE is the operation and AFTER is TRUE if the data in memory after the ++ operation should be returned and FALSE if the data before the operation ++ should be returned. Returns FALSE if the operation isn't supported by the ++ architecture. */ + -+# define MCPU_MTUNE_NATIVE_SPECS \ -+ " %{march=native:%mode) : operands[0]); - HOST_WIDE_INT imm = INTVAL (operands[2]); +@@ -9095,6 +9749,42 @@ aarch64_emit_post_barrier (enum memmodel model) + } + } -- if (imm < 0) -- imm = -(-imm & ~0xfff); -+ if (aarch64_move_imm (imm, mode) && can_create_pseudo_p ()) -+ { -+ rtx tmp = gen_reg_rtx (mode); -+ emit_move_insn (tmp, operands[2]); -+ operands[2] = tmp; -+ } - else -- imm &= ~0xfff; -- -- emit_insn (gen_add3 (subtarget, operands[1], GEN_INT (imm))); -- operands[1] = subtarget; -- operands[2] = GEN_INT (INTVAL (operands[2]) - imm); -+ { -+ rtx subtarget = ((optimize && can_create_pseudo_p ()) -+ ? gen_reg_rtx (mode) : operands[0]); ++/* Emit an atomic compare-and-swap operation. RVAL is the destination register ++ for the data in memory. EXPECTED is the value expected to be in memory. ++ DESIRED is the value to store to memory. MEM is the memory location. MODEL ++ is the memory ordering to use. */ + -+ if (imm < 0) -+ imm = -(-imm & ~0xfff); -+ else -+ imm &= ~0xfff; ++void ++aarch64_gen_atomic_cas (rtx rval, rtx mem, ++ rtx expected, rtx desired, ++ rtx model) ++{ ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ machine_mode mode; + -+ emit_insn (gen_add3 (subtarget, operands[1], GEN_INT (imm))); -+ operands[1] = subtarget; -+ operands[2] = GEN_INT (INTVAL (operands[2]) - imm); -+ } - } - " - ) -@@ -1529,6 +1539,38 @@ - [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] - ) - -+(define_insn "*adds_shift_imm_" -+ [(set (reg:CC_NZ CC_REGNUM) -+ (compare:CC_NZ -+ (plus:GPI (ASHIFT:GPI -+ (match_operand:GPI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_" "n")) -+ (match_operand:GPI 3 "register_operand" "r")) -+ (const_int 0))) -+ (set (match_operand:GPI 0 "register_operand" "=r") -+ (plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2)) -+ (match_dup 3)))] -+ "" -+ "adds\\t%0, %3, %1, %2" -+ [(set_attr "type" "alus_shift_imm")] -+) ++ mode = GET_MODE (mem); + -+(define_insn "*subs_shift_imm_" -+ [(set (reg:CC_NZ CC_REGNUM) -+ (compare:CC_NZ -+ (minus:GPI (match_operand:GPI 1 "register_operand" "r") -+ (ASHIFT:GPI -+ (match_operand:GPI 2 "register_operand" "r") -+ (match_operand:QI 3 "aarch64_shift_imm_" "n"))) -+ (const_int 0))) -+ (set (match_operand:GPI 0 "register_operand" "=r") -+ (minus:GPI (match_dup 1) -+ (ASHIFT:GPI (match_dup 2) (match_dup 3))))] -+ "" -+ "subs\\t%0, %1, %2, %3" -+ [(set_attr "type" "alus_shift_imm")] -+) ++ switch (mode) ++ { ++ case QImode: gen = gen_aarch64_atomic_casqi; break; ++ case HImode: gen = gen_aarch64_atomic_cashi; break; ++ case SImode: gen = gen_aarch64_atomic_cassi; break; ++ case DImode: gen = gen_aarch64_atomic_casdi; break; ++ default: ++ gcc_unreachable (); ++ } + - (define_insn "*adds_mul_imm_" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ -@@ -1589,6 +1631,42 @@ - [(set_attr "type" "alus_ext")] - ) ++ /* Move the expected value into the CAS destination register. */ ++ emit_insn (gen_rtx_SET (VOIDmode, rval, expected)); ++ ++ /* Emit the CAS. */ ++ emit_insn (gen (rval, mem, desired, model)); ++ ++ /* Compare the expected value with the value loaded by the CAS, to establish ++ whether the swap was made. */ ++ aarch64_gen_compare_reg (EQ, rval, expected); ++} ++ + /* Split a compare and swap pattern. */ -+(define_insn "*adds__shift_" -+ [(set (reg:CC_NZ CC_REGNUM) -+ (compare:CC_NZ -+ (plus:GPI (ashift:GPI -+ (ANY_EXTEND:GPI -+ (match_operand:ALLX 1 "register_operand" "r")) -+ (match_operand 2 "aarch64_imm3" "Ui3")) -+ (match_operand:GPI 3 "register_operand" "r")) -+ (const_int 0))) -+ (set (match_operand:GPI 0 "register_operand" "=rk") -+ (plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1)) -+ (match_dup 2)) -+ (match_dup 3)))] -+ "" -+ "adds\\t%0, %3, %1, xt %2" -+ [(set_attr "type" "alus_ext")] -+) + void +@@ -9163,11 +9853,257 @@ aarch64_split_compare_and_swap (rtx operands[]) + aarch64_emit_post_barrier (model); + } + ++/* Emit a BIC instruction. */ + -+(define_insn "*subs__shift_" -+ [(set (reg:CC_NZ CC_REGNUM) -+ (compare:CC_NZ -+ (minus:GPI (match_operand:GPI 1 "register_operand" "r") -+ (ashift:GPI -+ (ANY_EXTEND:GPI -+ (match_operand:ALLX 2 "register_operand" "r")) -+ (match_operand 3 "aarch64_imm3" "Ui3"))) -+ (const_int 0))) -+ (set (match_operand:GPI 0 "register_operand" "=rk") -+ (minus:GPI (match_dup 1) -+ (ashift:GPI (ANY_EXTEND:GPI (match_dup 2)) -+ (match_dup 3))))] -+ "" -+ "subs\\t%0, %1, %2, xt %3" -+ [(set_attr "type" "alus_ext")] -+) ++static void ++aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift) ++{ ++ rtx shift_rtx = GEN_INT (shift); ++ rtx (*gen) (rtx, rtx, rtx, rtx); + - (define_insn "*adds__multp2" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ -@@ -1884,6 +1962,38 @@ - [(set_attr "type" "adc_reg")] - ) - -+(define_insn "*add_uxt_shift2" -+ [(set (match_operand:GPI 0 "register_operand" "=rk") -+ (plus:GPI (and:GPI -+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") -+ (match_operand 2 "aarch64_imm3" "Ui3")) -+ (match_operand 3 "const_int_operand" "n")) -+ (match_operand:GPI 4 "register_operand" "r")))] -+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0" -+ "* -+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]), -+ INTVAL (operands[3]))); -+ return \"add\t%0, %4, %1, uxt%e3 %2\";" -+ [(set_attr "type" "alu_ext")] -+) ++ switch (mode) ++ { ++ case SImode: gen = gen_and_one_cmpl_lshrsi3; break; ++ case DImode: gen = gen_and_one_cmpl_lshrdi3; break; ++ default: ++ gcc_unreachable (); ++ } + -+;; zero_extend version of above -+(define_insn "*add_uxtsi_shift2_uxtw" -+ [(set (match_operand:DI 0 "register_operand" "=rk") -+ (zero_extend:DI -+ (plus:SI (and:SI -+ (ashift:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand 2 "aarch64_imm3" "Ui3")) -+ (match_operand 3 "const_int_operand" "n")) -+ (match_operand:SI 4 "register_operand" "r"))))] -+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0" -+ "* -+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), -+ INTVAL (operands[3]))); -+ return \"add\t%w0, %w4, %w1, uxt%e3 %2\";" -+ [(set_attr "type" "alu_ext")] -+) ++ emit_insn (gen (dst, s2, shift_rtx, s1)); ++} + - (define_insn "*add_uxt_multp2" - [(set (match_operand:GPI 0 "register_operand" "=rk") - (plus:GPI (and:GPI -@@ -2140,6 +2250,38 @@ - [(set_attr "type" "adc_reg")] - ) - -+(define_insn "*sub_uxt_shift2" -+ [(set (match_operand:GPI 0 "register_operand" "=rk") -+ (minus:GPI (match_operand:GPI 4 "register_operand" "rk") -+ (and:GPI -+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") -+ (match_operand 2 "aarch64_imm3" "Ui3")) -+ (match_operand 3 "const_int_operand" "n"))))] -+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0" -+ "* -+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), -+ INTVAL (operands[3]))); -+ return \"sub\t%0, %4, %1, uxt%e3 %2\";" -+ [(set_attr "type" "alu_ext")] -+) ++/* Emit an atomic swap. */ + -+;; zero_extend version of above -+(define_insn "*sub_uxtsi_shift2_uxtw" -+ [(set (match_operand:DI 0 "register_operand" "=rk") -+ (zero_extend:DI -+ (minus:SI (match_operand:SI 4 "register_operand" "rk") -+ (and:SI -+ (ashift:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand 2 "aarch64_imm3" "Ui3")) -+ (match_operand 3 "const_int_operand" "n")))))] -+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0" -+ "* -+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), -+ INTVAL (operands[3]))); -+ return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";" -+ [(set_attr "type" "alu_ext")] -+) ++static void ++aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value, ++ rtx mem, rtx model) ++{ ++ rtx (*gen) (rtx, rtx, rtx, rtx); + - (define_insn "*sub_uxt_multp2" - [(set (match_operand:GPI 0 "register_operand" "=rk") - (minus:GPI (match_operand:GPI 4 "register_operand" "rk") -@@ -3058,6 +3200,26 @@ - (set_attr "simd" "*,yes")] - ) - -+(define_insn "*_one_cmplsidi3_ze" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r"))))] -+ "" -+ "\\t%w0, %w2, %w1" -+ [(set_attr "type" "logic_reg")] -+) ++ switch (mode) ++ { ++ case QImode: gen = gen_aarch64_atomic_swpqi; break; ++ case HImode: gen = gen_aarch64_atomic_swphi; break; ++ case SImode: gen = gen_aarch64_atomic_swpsi; break; ++ case DImode: gen = gen_aarch64_atomic_swpdi; break; ++ default: ++ gcc_unreachable (); ++ } + -+(define_insn "*xor_one_cmplsidi3_ze" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")))))] -+ "" -+ "eon\\t%w0, %w1, %w2" -+ [(set_attr "type" "logic_reg")] -+) ++ emit_insn (gen (dst, mem, value, model)); ++} + - ;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)). - ;; eon does not operate on SIMD registers so the vector variant must be split. - (define_insn_and_split "*xor_one_cmpl3" -@@ -3131,6 +3293,32 @@ - [(set_attr "type" "logics_shift_imm")] - ) - -+(define_insn "*eor_one_cmpl_3_alt" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (not:GPI (xor:GPI -+ (SHIFT:GPI -+ (match_operand:GPI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_" "n")) -+ (match_operand:GPI 3 "register_operand" "r"))))] -+ "" -+ "eon\\t%0, %3, %1, %2" -+ [(set_attr "type" "logic_shift_imm")] -+) ++/* Operations supported by aarch64_emit_atomic_load_op. */ + -+;; Zero-extend version of the above. -+(define_insn "*eor_one_cmpl_sidi3_alt_ze" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (not:SI (xor:SI -+ (SHIFT:SI -+ (match_operand:SI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) -+ (match_operand:SI 3 "register_operand" "r")))))] -+ "" -+ "eon\\t%w0, %w3, %w1, %2" -+ [(set_attr "type" "logic_shift_imm")] -+) ++enum aarch64_atomic_load_op_code ++{ ++ AARCH64_LDOP_PLUS, /* A + B */ ++ AARCH64_LDOP_XOR, /* A ^ B */ ++ AARCH64_LDOP_OR, /* A | B */ ++ AARCH64_LDOP_BIC /* A & ~B */ ++}; + - (define_insn "*and_one_cmpl_3_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ -@@ -3551,6 +3739,21 @@ - [(set_attr "type" "shift_imm")] - ) - -+;; There are no canonicalisation rules for ashift and lshiftrt inside an ior -+;; so we have to match both orderings. -+(define_insn "*extr5_insn_alt" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ior:GPI (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r") -+ (match_operand 4 "const_int_operand" "n")) -+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") -+ (match_operand 3 "const_int_operand" "n"))))] -+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (mode) -+ && (UINTVAL (operands[3]) + UINTVAL (operands[4]) -+ == GET_MODE_BITSIZE (mode))" -+ "extr\\t%0, %1, %2, %4" -+ [(set_attr "type" "shift_imm")] -+) ++/* Emit an atomic load-operate. */ + - ;; zero_extend version of the above - (define_insn "*extrsi5_insn_uxtw" - [(set (match_operand:DI 0 "register_operand" "=r") -@@ -3565,6 +3768,19 @@ - [(set_attr "type" "shift_imm")] - ) - -+(define_insn "*extrsi5_insn_uxtw_alt" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") -+ (match_operand 4 "const_int_operand" "n")) -+ (ashift:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand 3 "const_int_operand" "n")))))] -+ "UINTVAL (operands[3]) < 32 && -+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" -+ "extr\\t%w0, %w1, %w2, %4" -+ [(set_attr "type" "shift_imm")] -+) ++static void ++aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code, ++ machine_mode mode, rtx dst, rtx src, ++ rtx mem, rtx model) ++{ ++ typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx); ++ const aarch64_atomic_load_op_fn plus[] = ++ { ++ gen_aarch64_atomic_loadaddqi, ++ gen_aarch64_atomic_loadaddhi, ++ gen_aarch64_atomic_loadaddsi, ++ gen_aarch64_atomic_loadadddi ++ }; ++ const aarch64_atomic_load_op_fn eor[] = ++ { ++ gen_aarch64_atomic_loadeorqi, ++ gen_aarch64_atomic_loadeorhi, ++ gen_aarch64_atomic_loadeorsi, ++ gen_aarch64_atomic_loadeordi ++ }; ++ const aarch64_atomic_load_op_fn ior[] = ++ { ++ gen_aarch64_atomic_loadsetqi, ++ gen_aarch64_atomic_loadsethi, ++ gen_aarch64_atomic_loadsetsi, ++ gen_aarch64_atomic_loadsetdi ++ }; ++ const aarch64_atomic_load_op_fn bic[] = ++ { ++ gen_aarch64_atomic_loadclrqi, ++ gen_aarch64_atomic_loadclrhi, ++ gen_aarch64_atomic_loadclrsi, ++ gen_aarch64_atomic_loadclrdi ++ }; ++ aarch64_atomic_load_op_fn gen; ++ int idx = 0; + - (define_insn "*ror3_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") - (rotate:GPI (match_operand:GPI 1 "register_operand" "r") ---- a/src/gcc/config/aarch64/arm_neon.h -+++ b/src/gcc/config/aarch64/arm_neon.h -@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a) - - /* vcvt_high_f32_f16 not supported */ - --static float32x2_t vdup_n_f32 (float32_t); -- - #define vcvt_n_f32_s32(a, b) \ - __extension__ \ - ({ \ -@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b) - result; \ - }) - --#define vst1_lane_f32(a, b, c) \ -- __extension__ \ -- ({ \ -- float32x2_t b_ = (b); \ -- float32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_f64(a, b, c) \ -- __extension__ \ -- ({ \ -- float64x1_t b_ = (b); \ -- float64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x8_t b_ = (b); \ -- poly8_t * a_ = (a); \ -- __asm__ ("st1 {%1.b}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x4_t b_ = (b); \ -- poly16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_s8(a, b, c) \ -- __extension__ \ -- ({ \ -- int8x8_t b_ = (b); \ -- int8_t * a_ = (a); \ -- __asm__ ("st1 {%1.b}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x4_t b_ = (b); \ -- int16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x2_t b_ = (b); \ -- int32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x1_t b_ = (b); \ -- int64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) ++ switch (mode) ++ { ++ case QImode: idx = 0; break; ++ case HImode: idx = 1; break; ++ case SImode: idx = 2; break; ++ case DImode: idx = 3; break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (code) ++ { ++ case AARCH64_LDOP_PLUS: gen = plus[idx]; break; ++ case AARCH64_LDOP_XOR: gen = eor[idx]; break; ++ case AARCH64_LDOP_OR: gen = ior[idx]; break; ++ case AARCH64_LDOP_BIC: gen = bic[idx]; break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ emit_insn (gen (dst, mem, src, model)); ++} ++ ++/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the ++ location to store the data read from memory. OUT_RESULT is the location to ++ store the result of the operation. MEM is the memory location to read and ++ modify. MODEL_RTX is the memory ordering to use. VALUE is the second ++ operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can ++ be NULL. */ ++ ++void ++aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result, ++ rtx mem, rtx value, rtx model_rtx) ++{ ++ machine_mode mode = GET_MODE (mem); ++ machine_mode wmode = (mode == DImode ? DImode : SImode); ++ const bool short_mode = (mode < SImode); ++ aarch64_atomic_load_op_code ldop_code; ++ rtx src; ++ rtx x; ++ ++ if (out_data) ++ out_data = gen_lowpart (mode, out_data); ++ ++ if (out_result) ++ out_result = gen_lowpart (mode, out_result); ++ ++ /* Make sure the value is in a register, putting it into a destination ++ register if it needs to be manipulated. */ ++ if (!register_operand (value, mode) ++ || code == AND || code == MINUS) ++ { ++ src = out_result ? out_result : out_data; ++ emit_move_insn (src, gen_lowpart (mode, value)); ++ } ++ else ++ src = value; ++ gcc_assert (register_operand (src, mode)); ++ ++ /* Preprocess the data for the operation as necessary. If the operation is ++ a SET then emit a swap instruction and finish. */ ++ switch (code) ++ { ++ case SET: ++ aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx); ++ return; ++ ++ case MINUS: ++ /* Negate the value and treat it as a PLUS. */ ++ { ++ rtx neg_src; ++ ++ /* Resize the value if necessary. */ ++ if (short_mode) ++ src = gen_lowpart (wmode, src); ++ ++ neg_src = gen_rtx_NEG (wmode, src); ++ emit_insn (gen_rtx_SET (VOIDmode, src, neg_src)); ++ ++ if (short_mode) ++ src = gen_lowpart (mode, src); ++ } ++ /* Fall-through. */ ++ case PLUS: ++ ldop_code = AARCH64_LDOP_PLUS; ++ break; ++ ++ case IOR: ++ ldop_code = AARCH64_LDOP_OR; ++ break; ++ ++ case XOR: ++ ldop_code = AARCH64_LDOP_XOR; ++ break; ++ ++ case AND: ++ { ++ rtx not_src; ++ ++ /* Resize the value if necessary. */ ++ if (short_mode) ++ src = gen_lowpart (wmode, src); ++ ++ not_src = gen_rtx_NOT (wmode, src); ++ emit_insn (gen_rtx_SET (VOIDmode, src, not_src)); ++ ++ if (short_mode) ++ src = gen_lowpart (mode, src); ++ } ++ ldop_code = AARCH64_LDOP_BIC; ++ break; ++ ++ default: ++ /* The operation can't be done with atomic instructions. */ ++ gcc_unreachable (); ++ } ++ ++ aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx); ++ ++ /* If necessary, calculate the data in memory after the update by redoing the ++ operation from values in registers. */ ++ if (!out_result) ++ return; ++ ++ if (short_mode) ++ { ++ src = gen_lowpart (wmode, src); ++ out_data = gen_lowpart (wmode, out_data); ++ out_result = gen_lowpart (wmode, out_result); ++ } ++ ++ x = NULL_RTX; ++ ++ switch (code) ++ { ++ case MINUS: ++ case PLUS: ++ x = gen_rtx_PLUS (wmode, out_data, src); ++ break; ++ case IOR: ++ x = gen_rtx_IOR (wmode, out_data, src); ++ break; ++ case XOR: ++ x = gen_rtx_XOR (wmode, out_data, src); ++ break; ++ case AND: ++ aarch64_emit_bic (wmode, out_result, out_data, src, 0); ++ return; ++ default: ++ gcc_unreachable (); ++ } ++ ++ emit_set_insn (out_result, x); ++ ++ return; ++} ++ + /* Split an atomic operation. */ + + void + aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, +- rtx value, rtx model_rtx, rtx cond) ++ rtx value, rtx model_rtx, rtx cond) + { + machine_mode mode = GET_MODE (mem); + machine_mode wmode = (mode == DImode ? DImode : SImode); +@@ -9176,6 +10112,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, + rtx_code_label *label; + rtx x; + ++ /* Split the atomic operation into a sequence. */ + label = gen_label_rtx (); + emit_label (label); + +@@ -10604,7 +11541,7 @@ aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, + static bool + aarch64_macro_fusion_p (void) + { +- return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING; ++ return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING; + } + + +@@ -10624,7 +11561,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + return false; + + if (simple_sets_p +- && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)) ++ && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK)) + { + /* We are trying to match: + prev (mov) == (set (reg r0) (const_int imm16)) +@@ -10649,7 +11586,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + } + + if (simple_sets_p +- && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD)) ++ && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD)) + { + + /* We're trying to match: +@@ -10675,7 +11612,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + } + + if (simple_sets_p +- && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK)) ++ && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK)) + { + + /* We're trying to match: +@@ -10704,7 +11641,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + + } + if (simple_sets_p +- && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR)) ++ && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR)) + { + /* We're trying to match: + prev (adrp) == (set (reg r0) +@@ -10735,7 +11672,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + } + } + +- if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH) ++ if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH) + && any_condjump_p (curr)) + { + enum attr_type prev_type = get_attr_type (prev); +--- a/src/gcc/config/aarch64/aarch64.h ++++ b/src/gcc/config/aarch64/aarch64.h +@@ -29,6 +29,10 @@ + builtin_define ("__aarch64__"); \ + builtin_define ("__ARM_64BIT_STATE"); \ + builtin_define_with_int_value \ ++ ("__ARM_ALIGN_MAX_PWR", 28); \ ++ builtin_define_with_int_value \ ++ ("__ARM_ALIGN_MAX_STACK_PWR", 16); \ ++ builtin_define_with_int_value \ + ("__ARM_ARCH", aarch64_architecture_version); \ + cpp_define_formatted \ + (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version); \ +@@ -198,8 +202,12 @@ extern unsigned aarch64_architecture_version; + #define AARCH64_FL_SIMD (1 << 0) /* Has SIMD instructions. */ + #define AARCH64_FL_FP (1 << 1) /* Has FP. */ + #define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ +-#define AARCH64_FL_SLOWMUL (1 << 3) /* A slow multiply core. */ +-#define AARCH64_FL_CRC (1 << 4) /* Has CRC. */ ++#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ ++/* ARMv8.1 architecture extensions. */ ++#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ ++#define AARCH64_FL_PAN (1 << 5) /* Has Privileged Access Never. */ ++#define AARCH64_FL_LOR (1 << 6) /* Has Limited Ordering regions. */ ++#define AARCH64_FL_RDMA (1 << 7) /* Has ARMv8.1 Adv.SIMD. */ + + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) +@@ -209,6 +217,9 @@ extern unsigned aarch64_architecture_version; + + /* Architecture flags that effect instruction selection. */ + #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) ++#define AARCH64_FL_FOR_ARCH8_1 \ ++ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \ ++ | AARCH64_FL_LOR | AARCH64_FL_RDMA) + + /* Macros to test ISA flags. */ + extern unsigned long aarch64_isa_flags; +@@ -216,10 +227,7 @@ extern unsigned long aarch64_isa_flags; + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) + #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) + #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) +- +-/* Macros to test tuning flags. */ +-extern unsigned long aarch64_tune_flags; +-#define AARCH64_TUNE_SLOWMUL (aarch64_tune_flags & AARCH64_FL_SLOWMUL) ++#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) + + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) +@@ -227,6 +235,9 @@ extern unsigned long aarch64_tune_flags; + /* CRC instructions that can be enabled through +crc arch extension. */ + #define TARGET_CRC32 (AARCH64_ISA_CRC) + ++/* Atomic instructions that can be enabled through the +lse extension. */ ++#define TARGET_LSE (AARCH64_ISA_LSE) ++ + /* Standard register usage. */ + + /* 31 64-bit general purpose registers R0-R30: +@@ -506,7 +517,7 @@ enum reg_class + + enum target_cpus + { +-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \ ++#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ + TARGET_CPU_##INTERNAL_IDENT, + #include "aarch64-cores.def" + #undef AARCH64_CORE +@@ -823,7 +834,8 @@ do { \ + #define TRAMPOLINE_SECTION text_section + + /* To start with. */ +-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2 ++#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \ ++ (aarch64_branch_cost (SPEED_P, PREDICTABLE_P)) + + + /* Assembly output. */ +@@ -929,11 +941,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); + #define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ + { "rewrite_mcpu", aarch64_rewrite_mcpu }, + ++#if defined(__aarch64__) ++extern const char *host_detect_local_cpu (int argc, const char **argv); ++# define EXTRA_SPEC_FUNCTIONS \ ++ { "local_cpu_detect", host_detect_local_cpu }, \ ++ BIG_LITTLE_CPU_SPEC_FUNCTIONS ++ ++# define MCPU_MTUNE_NATIVE_SPECS \ ++ " %{march=native:%,neon_from_gp,neon_dup") + (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")] + ) +@@ -912,7 +912,7 @@ + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ +- adr,adr,f_mcr,f_mrc,fmov,fmov") ++ adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + ) +@@ -979,23 +979,25 @@ + [(set (match_operand:GPF 0 "nonimmediate_operand" "") + (match_operand:GPF 1 "general_operand" ""))] + "" +- " ++ { + if (!TARGET_FLOAT) +- { +- sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\"); ++ { ++ aarch64_err_no_fpadvsimd (mode, "code"); + FAIL; +- } ++ } + +- if (GET_CODE (operands[0]) == MEM) ++ if (GET_CODE (operands[0]) == MEM ++ && ! (GET_CODE (operands[1]) == CONST_DOUBLE ++ && aarch64_float_const_zero_rtx_p (operands[1]))) + operands[1] = force_reg (mode, operands[1]); +- " ++ } + ) + + (define_insn "*movsf_aarch64" + [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") + (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], SFmode) +- || register_operand (operands[1], SFmode))" ++ || aarch64_reg_or_fp_zero (operands[1], SFmode))" + "@ + fmov\\t%s0, %w1 + fmov\\t%w0, %s1 +@@ -1007,14 +1009,14 @@ + str\\t%w1, %0 + mov\\t%w0, %w1" + [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ +- f_loads,f_stores,f_loads,f_stores,mov_reg")] ++ f_loads,f_stores,load1,store1,mov_reg")] + ) + + (define_insn "*movdf_aarch64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") + (match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], DFmode) +- || register_operand (operands[1], DFmode))" ++ || aarch64_reg_or_fp_zero (operands[1], DFmode))" + "@ + fmov\\t%d0, %x1 + fmov\\t%x0, %d1 +@@ -1026,32 +1028,34 @@ + str\\t%x1, %0 + mov\\t%x0, %x1" + [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\ +- f_loadd,f_stored,f_loadd,f_stored,mov_reg")] ++ f_loadd,f_stored,load1,store1,mov_reg")] + ) + + (define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "" +- " ++ { + if (!TARGET_FLOAT) +- { +- sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\"); ++ { ++ aarch64_err_no_fpadvsimd (TFmode, "code"); + FAIL; +- } ++ } + +- if (GET_CODE (operands[0]) == MEM) ++ if (GET_CODE (operands[0]) == MEM ++ && ! (GET_CODE (operands[1]) == CONST_DOUBLE ++ && aarch64_float_const_zero_rtx_p (operands[1]))) + operands[1] = force_reg (TFmode, operands[1]); +- " ++ } + ) + + (define_insn "*movtf_aarch64" + [(set (match_operand:TF 0 +- "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump") ++ "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump") + (match_operand:TF 1 +- "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,Ump,?rY"))] ++ "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))] + "TARGET_FLOAT && (register_operand (operands[0], TFmode) +- || register_operand (operands[1], TFmode))" ++ || aarch64_reg_or_fp_zero (operands[1], TFmode))" + "@ + orr\\t%0.16b, %1.16b, %1.16b + # +@@ -1062,12 +1066,13 @@ + ldr\\t%q0, %1 + str\\t%q1, %0 + ldp\\t%0, %H0, %1 +- stp\\t%1, %H1, %0" +- [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\ +- f_loadd,f_stored,neon_load1_2reg,neon_store1_2reg") +- (set_attr "length" "4,8,8,8,4,4,4,4,4,4") +- (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*") +- (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")] ++ stp\\t%1, %H1, %0 ++ stp\\txzr, xzr, %0" ++ [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,fconstd,\ ++ f_loadd,f_stored,load2,store2,store2") ++ (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") ++ (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*,*") ++ (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] + ) + + (define_split +@@ -1414,18 +1419,28 @@ + " + if (! aarch64_plus_operand (operands[2], VOIDmode)) + { +- rtx subtarget = ((optimize && can_create_pseudo_p ()) +- ? gen_reg_rtx (mode) : operands[0]); + HOST_WIDE_INT imm = INTVAL (operands[2]); + +- if (imm < 0) +- imm = -(-imm & ~0xfff); ++ if (aarch64_move_imm (imm, mode) && can_create_pseudo_p ()) ++ { ++ rtx tmp = gen_reg_rtx (mode); ++ emit_move_insn (tmp, operands[2]); ++ operands[2] = tmp; ++ } + else +- imm &= ~0xfff; - --#define vst1_lane_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x4_t b_ = (b); \ -- uint16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x2_t b_ = (b); \ -- uint32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1_lane_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x1_t b_ = (b); \ -- uint64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- -- --#define vst1q_lane_f32(a, b, c) \ -- __extension__ \ -- ({ \ -- float32x4_t b_ = (b); \ -- float32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_f64(a, b, c) \ -- __extension__ \ -- ({ \ -- float64x2_t b_ = (b); \ -- float64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x16_t b_ = (b); \ -- poly8_t * a_ = (a); \ -- __asm__ ("st1 {%1.b}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x8_t b_ = (b); \ -- poly16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_s8(a, b, c) \ -- __extension__ \ -- ({ \ -- int8x16_t b_ = (b); \ -- int8_t * a_ = (a); \ -- __asm__ ("st1 {%1.b}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_u8(a, b, c) \ -- __extension__ \ -- ({ \ -- uint8x16_t b_ = (b); \ -- uint8_t * a_ = (a); \ -- __asm__ ("st1 {%1.b}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint16_t * a_ = (a); \ -- __asm__ ("st1 {%1.h}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint32_t * a_ = (a); \ -- __asm__ ("st1 {%1.s}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- --#define vst1q_lane_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint64_t * a_ = (a); \ -- __asm__ ("st1 {%1.d}[%2],[%0]" \ -- : \ -- : "r"(a_), "w"(b_), "i"(c) \ -- : "memory"); \ -- }) -- -- - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vtst_p8 (poly8x8_t a, poly8x8_t b) - { -@@ -11668,25 +11400,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) - - /* vaes */ - --static __inline uint8x16_t -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vaeseq_u8 (uint8x16_t data, uint8x16_t key) - { - return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); - } - --static __inline uint8x16_t -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vaesdq_u8 (uint8x16_t data, uint8x16_t key) - { - return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); - } - --static __inline uint8x16_t -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vaesmcq_u8 (uint8x16_t data) - { - return __builtin_aarch64_crypto_aesmcv16qi_uu (data); - } +- emit_insn (gen_add3 (subtarget, operands[1], GEN_INT (imm))); +- operands[1] = subtarget; +- operands[2] = GEN_INT (INTVAL (operands[2]) - imm); ++ { ++ rtx subtarget = ((optimize && can_create_pseudo_p ()) ++ ? gen_reg_rtx (mode) : operands[0]); ++ ++ if (imm < 0) ++ imm = -(-imm & ~0xfff); ++ else ++ imm &= ~0xfff; ++ ++ emit_insn (gen_add3 (subtarget, operands[1], GEN_INT (imm))); ++ operands[1] = subtarget; ++ operands[2] = GEN_INT (INTVAL (operands[2]) - imm); ++ } + } + " + ) +@@ -1529,6 +1544,38 @@ + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] + ) --static __inline uint8x16_t -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vaesimcq_u8 (uint8x16_t data) - { - return __builtin_aarch64_crypto_aesimcv16qi_uu (data); -@@ -11887,7 +11619,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vceq_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll}; -+ return (uint64x1_t) (__a == __b); - } ++(define_insn "*adds_shift_imm_" ++ [(set (reg:CC_NZ CC_REGNUM) ++ (compare:CC_NZ ++ (plus:GPI (ASHIFT:GPI ++ (match_operand:GPI 1 "register_operand" "r") ++ (match_operand:QI 2 "aarch64_shift_imm_" "n")) ++ (match_operand:GPI 3 "register_operand" "r")) ++ (const_int 0))) ++ (set (match_operand:GPI 0 "register_operand" "=r") ++ (plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2)) ++ (match_dup 3)))] ++ "" ++ "adds\\t%0, %3, %1, %2" ++ [(set_attr "type" "alus_shift_imm")] ++) ++ ++(define_insn "*subs_shift_imm_" ++ [(set (reg:CC_NZ CC_REGNUM) ++ (compare:CC_NZ ++ (minus:GPI (match_operand:GPI 1 "register_operand" "r") ++ (ASHIFT:GPI ++ (match_operand:GPI 2 "register_operand" "r") ++ (match_operand:QI 3 "aarch64_shift_imm_" "n"))) ++ (const_int 0))) ++ (set (match_operand:GPI 0 "register_operand" "=r") ++ (minus:GPI (match_dup 1) ++ (ASHIFT:GPI (match_dup 2) (match_dup 3))))] ++ "" ++ "subs\\t%0, %1, %2, %3" ++ [(set_attr "type" "alus_shift_imm")] ++) ++ + (define_insn "*adds_mul_imm_" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ +@@ -1589,6 +1636,42 @@ + [(set_attr "type" "alus_ext")] + ) - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -11911,7 +11643,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vceq_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll}; -+ return (__a == __b); - } ++(define_insn "*adds__shift_" ++ [(set (reg:CC_NZ CC_REGNUM) ++ (compare:CC_NZ ++ (plus:GPI (ashift:GPI ++ (ANY_EXTEND:GPI ++ (match_operand:ALLX 1 "register_operand" "r")) ++ (match_operand 2 "aarch64_imm3" "Ui3")) ++ (match_operand:GPI 3 "register_operand" "r")) ++ (const_int 0))) ++ (set (match_operand:GPI 0 "register_operand" "=rk") ++ (plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1)) ++ (match_dup 2)) ++ (match_dup 3)))] ++ "" ++ "adds\\t%0, %3, %1, xt %2" ++ [(set_attr "type" "alus_ext")] ++) ++ ++(define_insn "*subs__shift_" ++ [(set (reg:CC_NZ CC_REGNUM) ++ (compare:CC_NZ ++ (minus:GPI (match_operand:GPI 1 "register_operand" "r") ++ (ashift:GPI ++ (ANY_EXTEND:GPI ++ (match_operand:ALLX 2 "register_operand" "r")) ++ (match_operand 3 "aarch64_imm3" "Ui3"))) ++ (const_int 0))) ++ (set (match_operand:GPI 0 "register_operand" "=rk") ++ (minus:GPI (match_dup 1) ++ (ashift:GPI (ANY_EXTEND:GPI (match_dup 2)) ++ (match_dup 3))))] ++ "" ++ "subs\\t%0, %1, %2, xt %3" ++ [(set_attr "type" "alus_ext")] ++) ++ + (define_insn "*adds__multp2" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ +@@ -1884,6 +1967,38 @@ + [(set_attr "type" "adc_reg")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12047,7 +11779,7 @@ vceqz_s32 (int32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vceqz_s64 (int64x1_t __a) - { -- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll}; -+ return (uint64x1_t) (__a == __AARCH64_INT64_C (0)); - } ++(define_insn "*add_uxt_shift2" ++ [(set (match_operand:GPI 0 "register_operand" "=rk") ++ (plus:GPI (and:GPI ++ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") ++ (match_operand 2 "aarch64_imm3" "Ui3")) ++ (match_operand 3 "const_int_operand" "n")) ++ (match_operand:GPI 4 "register_operand" "r")))] ++ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0" ++ "* ++ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]), ++ INTVAL (operands[3]))); ++ return \"add\t%0, %4, %1, uxt%e3 %2\";" ++ [(set_attr "type" "alu_ext")] ++) ++ ++;; zero_extend version of above ++(define_insn "*add_uxtsi_shift2_uxtw" ++ [(set (match_operand:DI 0 "register_operand" "=rk") ++ (zero_extend:DI ++ (plus:SI (and:SI ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 2 "aarch64_imm3" "Ui3")) ++ (match_operand 3 "const_int_operand" "n")) ++ (match_operand:SI 4 "register_operand" "r"))))] ++ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0" ++ "* ++ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), ++ INTVAL (operands[3]))); ++ return \"add\t%w0, %w4, %w1, uxt%e3 %2\";" ++ [(set_attr "type" "alu_ext")] ++) ++ + (define_insn "*add_uxt_multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (and:GPI +@@ -2140,6 +2255,38 @@ + [(set_attr "type" "adc_reg")] + ) - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -12071,7 +11803,7 @@ vceqz_u32 (uint32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vceqz_u64 (uint64x1_t __a) - { -- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll}; -+ return (__a == __AARCH64_UINT64_C (0)); - } ++(define_insn "*sub_uxt_shift2" ++ [(set (match_operand:GPI 0 "register_operand" "=rk") ++ (minus:GPI (match_operand:GPI 4 "register_operand" "rk") ++ (and:GPI ++ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") ++ (match_operand 2 "aarch64_imm3" "Ui3")) ++ (match_operand 3 "const_int_operand" "n"))))] ++ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0" ++ "* ++ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), ++ INTVAL (operands[3]))); ++ return \"sub\t%0, %4, %1, uxt%e3 %2\";" ++ [(set_attr "type" "alu_ext")] ++) ++ ++;; zero_extend version of above ++(define_insn "*sub_uxtsi_shift2_uxtw" ++ [(set (match_operand:DI 0 "register_operand" "=rk") ++ (zero_extend:DI ++ (minus:SI (match_operand:SI 4 "register_operand" "rk") ++ (and:SI ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 2 "aarch64_imm3" "Ui3")) ++ (match_operand 3 "const_int_operand" "n")))))] ++ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0" ++ "* ++ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]), ++ INTVAL (operands[3]))); ++ return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";" ++ [(set_attr "type" "alu_ext")] ++) ++ + (define_insn "*sub_uxt_multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (minus:GPI (match_operand:GPI 4 "register_operand" "rk") +@@ -2172,35 +2319,16 @@ + [(set_attr "type" "alu_ext")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12201,7 +11933,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcge_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll}; -+ return (uint64x1_t) (__a >= __b); - } +-(define_insn_and_split "absdi2" +- [(set (match_operand:DI 0 "register_operand" "=&r,w") +- (abs:DI (match_operand:DI 1 "register_operand" "r,w")))] ++(define_expand "abs2" ++ [(match_operand:GPI 0 "register_operand" "") ++ (match_operand:GPI 1 "register_operand" "")] + "" +- "@ +- # +- abs\\t%d0, %d1" +- "reload_completed +- && GP_REGNUM_P (REGNO (operands[0])) +- && GP_REGNUM_P (REGNO (operands[1]))" +- [(const_int 0)] + { +- emit_insn (gen_rtx_SET (VOIDmode, operands[0], +- gen_rtx_XOR (DImode, +- gen_rtx_ASHIFTRT (DImode, +- operands[1], +- GEN_INT (63)), +- operands[1]))); +- emit_insn (gen_rtx_SET (VOIDmode, +- operands[0], +- gen_rtx_MINUS (DImode, +- operands[0], +- gen_rtx_ASHIFTRT (DImode, +- operands[1], +- GEN_INT (63))))); ++ rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); ++ rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx); ++ emit_insn (gen_csneg3_insn (operands[0], x, operands[1], operands[1])); + DONE; + } +- [(set_attr "type" "alu_sreg") +- (set_attr "simd" "no,yes")] + ) - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -12225,7 +11957,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcge_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll}; -+ return (__a >= __b); - } + (define_insn "neg2" +@@ -2852,7 +2980,7 @@ + (plus:GPI (match_operand 2 "aarch64_comparison_operation" "") + (match_operand:GPI 1 "register_operand" "r")))] + "" +- "csinc\\t%0, %1, %1, %M2" ++ "cinc\\t%0, %1, %m2" + [(set_attr "type" "csel")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12349,7 +12081,7 @@ vcgez_s32 (int32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcgez_s64 (int64x1_t __a) - { -- return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll}; -+ return (uint64x1_t) (__a >= __AARCH64_INT64_C (0)); - } +@@ -2879,7 +3007,7 @@ + [(set_attr "type" "csel")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12443,7 +12175,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcgt_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll); -+ return (uint64x1_t) (__a > __b); - } +-(define_insn "*csneg3_insn" ++(define_insn "csneg3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (if_then_else:GPI + (match_operand 1 "aarch64_comparison_operation" "") +@@ -3058,6 +3186,26 @@ + (set_attr "simd" "*,yes")] + ) - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -12467,7 +12199,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcgt_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll); -+ return (__a > __b); - } ++(define_insn "*_one_cmplsidi3_ze" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r"))))] ++ "" ++ "\\t%w0, %w2, %w1" ++ [(set_attr "type" "logic_reg")] ++) ++ ++(define_insn "*xor_one_cmplsidi3_ze" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))))] ++ "" ++ "eon\\t%w0, %w1, %w2" ++ [(set_attr "type" "logic_reg")] ++) ++ + ;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)). + ;; eon does not operate on SIMD registers so the vector variant must be split. + (define_insn_and_split "*xor_one_cmpl3" +@@ -3119,7 +3267,7 @@ + [(set_attr "type" "logics_reg")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12591,7 +12323,7 @@ vcgtz_s32 (int32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcgtz_s64 (int64x1_t __a) - { -- return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll}; -+ return (uint64x1_t) (__a > __AARCH64_INT64_C (0)); - } +-(define_insn "*_one_cmpl_3" ++(define_insn "_one_cmpl_3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (LOGICAL:GPI (not:GPI + (SHIFT:GPI +@@ -3128,7 +3276,33 @@ + (match_operand:GPI 3 "register_operand" "r")))] + "" + "\\t%0, %3, %1, %2" +- [(set_attr "type" "logics_shift_imm")] ++ [(set_attr "type" "logic_shift_imm")] ++) ++ ++(define_insn "*eor_one_cmpl_3_alt" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (not:GPI (xor:GPI ++ (SHIFT:GPI ++ (match_operand:GPI 1 "register_operand" "r") ++ (match_operand:QI 2 "aarch64_shift_imm_" "n")) ++ (match_operand:GPI 3 "register_operand" "r"))))] ++ "" ++ "eon\\t%0, %3, %1, %2" ++ [(set_attr "type" "logic_shift_imm")] ++) ++ ++;; Zero-extend version of the above. ++(define_insn "*eor_one_cmpl_sidi3_alt_ze" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (not:SI (xor:SI ++ (SHIFT:SI ++ (match_operand:SI 1 "register_operand" "r") ++ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) ++ (match_operand:SI 3 "register_operand" "r")))))] ++ "" ++ "eon\\t%w0, %w3, %w1, %2" ++ [(set_attr "type" "logic_shift_imm")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12685,7 +12417,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcle_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll}; -+ return (uint64x1_t) (__a <= __b); - } + (define_insn "*and_one_cmpl_3_compare0" +@@ -3347,32 +3521,33 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -12709,7 +12441,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcle_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll}; -+ return (__a <= __b); - } + ;; Logical left shift using SISD or Integer instruction + (define_insn "*aarch64_ashl_sisd_or_int_3" +- [(set (match_operand:GPI 0 "register_operand" "=w,w,r") ++ [(set (match_operand:GPI 0 "register_operand" "=r,w,w") + (ashift:GPI +- (match_operand:GPI 1 "register_operand" "w,w,r") +- (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] ++ (match_operand:GPI 1 "register_operand" "r,w,w") ++ (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs,Us,w")))] + "" + "@ ++ lsl\t%0, %1, %2 + shl\t%0, %1, %2 +- ushl\t%0, %1, %2 +- lsl\t%0, %1, %2" +- [(set_attr "simd" "yes,yes,no") +- (set_attr "type" "neon_shift_imm, neon_shift_reg,shift_reg")] ++ ushl\t%0, %1, %2" ++ [(set_attr "simd" "no,yes,yes") ++ (set_attr "type" "shift_reg,neon_shift_imm, neon_shift_reg")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12833,7 +12565,7 @@ vclez_s32 (int32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vclez_s64 (int64x1_t __a) - { -- return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll}; -+ return (uint64x1_t) (__a <= __AARCH64_INT64_C (0)); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -12927,7 +12659,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vclt_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll}; -+ return (uint64x1_t) (__a < __b); - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -12951,7 +12683,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vclt_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll}; -+ return (__a < __b); - } + ;; Logical right shift using SISD or Integer instruction + (define_insn "*aarch64_lshr_sisd_or_int_3" +- [(set (match_operand:GPI 0 "register_operand" "=w,&w,r") ++ [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") + (lshiftrt:GPI +- (match_operand:GPI 1 "register_operand" "w,w,r") +- (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] ++ (match_operand:GPI 1 "register_operand" "r,w,w,w") ++ (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs,Us,w,0")))] + "" + "@ ++ lsr\t%0, %1, %2 + ushr\t%0, %1, %2 + # +- lsr\t%0, %1, %2" +- [(set_attr "simd" "yes,yes,no") +- (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] ++ #" ++ [(set_attr "simd" "no,yes,yes,yes") ++ (set_attr "type" "shift_reg,neon_shift_imm,neon_shift_reg,neon_shift_reg")] + ) - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -13075,7 +12807,7 @@ vcltz_s32 (int32x2_t __a) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vcltz_s64 (int64x1_t __a) - { -- return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll}; -+ return (uint64x1_t) (__a < __AARCH64_INT64_C (0)); - } + (define_split +@@ -3407,18 +3582,18 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -@@ -21321,72 +21053,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) + ;; Arithmetic right shift using SISD or Integer instruction + (define_insn "*aarch64_ashr_sisd_or_int_3" +- [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r") ++ [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") + (ashiftrt:GPI +- (match_operand:GPI 1 "register_operand" "w,w,w,r") +- (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,0,rUs")))] ++ (match_operand:GPI 1 "register_operand" "r,w,w,w") ++ (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs,Us,w,0")))] + "" + "@ ++ asr\t%0, %1, %2 + sshr\t%0, %1, %2 + # +- # +- asr\t%0, %1, %2" +- [(set_attr "simd" "yes,yes,yes,no") +- (set_attr "type" "neon_shift_imm,neon_shift_reg,neon_shift_reg,shift_reg")] ++ #" ++ [(set_attr "simd" "no,yes,yes,yes") ++ (set_attr "type" "shift_reg,neon_shift_imm,neon_shift_reg,neon_shift_reg")] + ) - /* vsha1 */ + (define_split +@@ -3551,6 +3726,21 @@ + [(set_attr "type" "shift_imm")] + ) --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { - return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); - } --static __inline uint32x4_t -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { - return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); - } --static __inline uint32x4_t ++;; There are no canonicalisation rules for ashift and lshiftrt inside an ior ++;; so we have to match both orderings. ++(define_insn "*extr5_insn_alt" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (ior:GPI (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r") ++ (match_operand 4 "const_int_operand" "n")) ++ (ashift:GPI (match_operand:GPI 1 "register_operand" "r") ++ (match_operand 3 "const_int_operand" "n"))))] ++ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (mode) ++ && (UINTVAL (operands[3]) + UINTVAL (operands[4]) ++ == GET_MODE_BITSIZE (mode))" ++ "extr\\t%0, %1, %2, %4" ++ [(set_attr "type" "shift_imm")] ++) + -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { - return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); - } - --static __inline uint32_t -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vsha1h_u32 (uint32_t hash_e) - { - return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) - { - return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) - { - return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) - { - return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) - { - return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) - { - return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); - } - --static __inline uint32x4_t -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) - { - return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); - } - --static __inline poly128_t -+__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) - vmull_p64 (poly64_t a, poly64_t b) - { - return - __builtin_aarch64_crypto_pmulldi_ppp (a, b); - } - --static __inline poly128_t -+__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) - vmull_high_p64 (poly64x2_t a, poly64x2_t b) - { - return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); -@@ -22302,6 +22036,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b) - *a = b[0]; - } + ;; zero_extend version of the above + (define_insn "*extrsi5_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") +@@ -3565,6 +3755,19 @@ + [(set_attr "type" "shift_imm")] + ) -+/* vst1q */ ++(define_insn "*extrsi5_insn_uxtw_alt" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") ++ (match_operand 4 "const_int_operand" "n")) ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 3 "const_int_operand" "n")))))] ++ "UINTVAL (operands[3]) < 32 && ++ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" ++ "extr\\t%w0, %w1, %w2, %4" ++ [(set_attr "type" "shift_imm")] ++) + - __extension__ static __inline void __attribute__ ((__always_inline__)) - vst1q_f32 (float32_t *a, float32x4_t b) - { -@@ -22314,8 +22050,6 @@ vst1q_f64 (float64_t *a, float64x2_t b) - __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); - } + (define_insn "*ror3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (rotate:GPI (match_operand:GPI 1 "register_operand" "r") +@@ -3915,7 +4118,7 @@ + (define_insn "2" + [(set (match_operand:GPF 0 "register_operand" "=w,w") + (FLOATUORS:GPF (match_operand: 1 "register_operand" "w,r")))] +- "" ++ "TARGET_FLOAT" + "@ + cvtf\t%0, %1 + cvtf\t%0, %1" +--- a/src/gcc/config/aarch64/aarch64.opt ++++ b/src/gcc/config/aarch64/aarch64.opt +@@ -111,6 +111,10 @@ mabi= + Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT) + -mabi=ABI Generate code that conforms to the specified ABI + ++moverride= ++Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) ++-moverride=STRING Power users only! Override CPU optimization parameters ++ + Enum + Name(aarch64_abi) Type(int) + Known AArch64 ABIs (for use with the -mabi= option): +--- a/src/gcc/config/aarch64/arm_neon.h ++++ b/src/gcc/config/aarch64/arm_neon.h +@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a) --/* vst1q */ + /* vcvt_high_f32_f16 not supported */ + +-static float32x2_t vdup_n_f32 (float32_t); - - __extension__ static __inline void __attribute__ ((__always_inline__)) - vst1q_p8 (poly8_t *a, poly8x16_t b) - { -@@ -22382,6 +22116,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b) - (int64x2_t) b); - } + #define vcvt_n_f32_s32(a, b) \ + __extension__ \ + ({ \ +@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b) + result; \ + }) -+/* vst1_lane */ -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane) -+{ +-#define vst1_lane_f32(a, b, c) \ +- __extension__ \ +- ({ \ +- float32x2_t b_ = (b); \ +- float32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_f64(a, b, c) \ +- __extension__ \ +- ({ \ +- float64x1_t b_ = (b); \ +- float64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_p8(a, b, c) \ +- __extension__ \ +- ({ \ +- poly8x8_t b_ = (b); \ +- poly8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_p16(a, b, c) \ +- __extension__ \ +- ({ \ +- poly16x4_t b_ = (b); \ +- poly16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_s8(a, b, c) \ +- __extension__ \ +- ({ \ +- int8x8_t b_ = (b); \ +- int8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_s16(a, b, c) \ +- __extension__ \ +- ({ \ +- int16x4_t b_ = (b); \ +- int16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_s32(a, b, c) \ +- __extension__ \ +- ({ \ +- int32x2_t b_ = (b); \ +- int32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_s64(a, b, c) \ +- __extension__ \ +- ({ \ +- int64x1_t b_ = (b); \ +- int64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_u8(a, b, c) \ +- __extension__ \ +- ({ \ +- uint8x8_t b_ = (b); \ +- uint8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_u16(a, b, c) \ +- __extension__ \ +- ({ \ +- uint16x4_t b_ = (b); \ +- uint16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_u32(a, b, c) \ +- __extension__ \ +- ({ \ +- uint32x2_t b_ = (b); \ +- uint32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1_lane_u64(a, b, c) \ +- __extension__ \ +- ({ \ +- uint64x1_t b_ = (b); \ +- uint64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +- +-#define vst1q_lane_f32(a, b, c) \ +- __extension__ \ +- ({ \ +- float32x4_t b_ = (b); \ +- float32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_f64(a, b, c) \ +- __extension__ \ +- ({ \ +- float64x2_t b_ = (b); \ +- float64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_p8(a, b, c) \ +- __extension__ \ +- ({ \ +- poly8x16_t b_ = (b); \ +- poly8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_p16(a, b, c) \ +- __extension__ \ +- ({ \ +- poly16x8_t b_ = (b); \ +- poly16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_s8(a, b, c) \ +- __extension__ \ +- ({ \ +- int8x16_t b_ = (b); \ +- int8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_s16(a, b, c) \ +- __extension__ \ +- ({ \ +- int16x8_t b_ = (b); \ +- int16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_s32(a, b, c) \ +- __extension__ \ +- ({ \ +- int32x4_t b_ = (b); \ +- int32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_s64(a, b, c) \ +- __extension__ \ +- ({ \ +- int64x2_t b_ = (b); \ +- int64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_u8(a, b, c) \ +- __extension__ \ +- ({ \ +- uint8x16_t b_ = (b); \ +- uint8_t * a_ = (a); \ +- __asm__ ("st1 {%1.b}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_u16(a, b, c) \ +- __extension__ \ +- ({ \ +- uint16x8_t b_ = (b); \ +- uint16_t * a_ = (a); \ +- __asm__ ("st1 {%1.h}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_u32(a, b, c) \ +- __extension__ \ +- ({ \ +- uint32x4_t b_ = (b); \ +- uint32_t * a_ = (a); \ +- __asm__ ("st1 {%1.s}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +-#define vst1q_lane_u64(a, b, c) \ +- __extension__ \ +- ({ \ +- uint64x2_t b_ = (b); \ +- uint64_t * a_ = (a); \ +- __asm__ ("st1 {%1.d}[%2],[%0]" \ +- : \ +- : "r"(a_), "w"(b_), "i"(c) \ +- : "memory"); \ +- }) +- +- + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vtst_p8 (poly8x8_t a, poly8x8_t b) + { +@@ -10218,8 +9950,8 @@ __STRUCTN (float, 64, 4) + #undef __STRUCTN + + +-#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ +- mode, ptr_mode, funcsuffix, signedtype) \ ++#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \ ++ qmode, ptr_mode, funcsuffix, signedtype) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ +@@ -10233,31 +9965,37 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ +- __o = __builtin_aarch64_set_qregoi##mode (__o, \ +- (signedtype) __temp.val[0], 0); \ +- __o = __builtin_aarch64_set_qregoi##mode (__o, \ +- (signedtype) __temp.val[1], 1); \ ++ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ ++ (signedtype) __temp.val[1], 1); \ + __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ + } + +-__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, ++__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, + float32x4_t) +-__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, ++__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, + float64x2_t) +-__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) +-__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, ++__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, ++ int8x16_t) ++__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, + int16x8_t) +-__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) +-__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) +-__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) +-__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) +-__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) +-__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, ++__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, ++ int8x16_t) ++__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, ++ int16x8_t) ++__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, ++ int32x4_t) ++__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, ++ int64x2_t) ++__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, ++ int8x16_t) ++__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, + int16x8_t) +-__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, ++__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, + int32x4_t) +-__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, ++__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, + int64x2_t) + + #undef __ST2_LANE_FUNC +@@ -10286,8 +10024,8 @@ __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) + __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) + __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) + +-#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ +- mode, ptr_mode, funcsuffix, signedtype) \ ++#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \ ++ qmode, ptr_mode, funcsuffix, signedtype) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ +@@ -10304,33 +10042,39 @@ vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ + __temp.val[2] \ + = vcombine_##funcsuffix (__b.val[2], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[0], 0); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[1], 1); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[2], 2); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[1], 1); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[2], 2); \ + __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ + } + +-__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, ++__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, + float32x4_t) +-__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, ++__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, + float64x2_t) +-__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) +-__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, ++__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, ++ int8x16_t) ++__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, ++ int16x8_t) ++__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, ++ int8x16_t) ++__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, + int16x8_t) +-__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) +-__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) +-__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) +-__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) +-__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) +-__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, ++__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, ++ int32x4_t) ++__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, ++ int64x2_t) ++__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, ++ int8x16_t) ++__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, + int16x8_t) +-__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, ++__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, + int32x4_t) +-__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, ++__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, + int64x2_t) + + #undef __ST3_LANE_FUNC +@@ -10359,8 +10103,8 @@ __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) + __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) + __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) + +-#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ +- mode, ptr_mode, funcsuffix, signedtype) \ ++#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \ ++ qmode, ptr_mode, funcsuffix, signedtype) \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ +@@ -10380,35 +10124,41 @@ vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ + __temp.val[3] \ + = vcombine_##funcsuffix (__b.val[3], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[0], 0); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[1], 1); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[2], 2); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[3], 3); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[1], 1); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[2], 2); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[3], 3); \ + __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ + } + +-__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, ++__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, + float32x4_t) +-__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, ++__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, + float64x2_t) +-__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) +-__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, ++__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, ++ int8x16_t) ++__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, + int16x8_t) +-__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) +-__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) +-__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) +-__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) +-__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) +-__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, ++__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, ++ int8x16_t) ++__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, + int16x8_t) +-__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, ++__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, + int32x4_t) +-__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, ++__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, ++ int64x2_t) ++__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, ++ int8x16_t) ++__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, ++ int16x8_t) ++__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, ++ int32x4_t) ++__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, + int64x2_t) + + #undef __ST4_LANE_FUNC +@@ -11668,25 +11418,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) + + /* vaes */ + +-static __inline uint8x16_t ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vaeseq_u8 (uint8x16_t data, uint8x16_t key) + { + return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); + } + +-static __inline uint8x16_t ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vaesdq_u8 (uint8x16_t data, uint8x16_t key) + { + return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); + } + +-static __inline uint8x16_t ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vaesmcq_u8 (uint8x16_t data) + { + return __builtin_aarch64_crypto_aesmcv16qi_uu (data); + } + +-static __inline uint8x16_t ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vaesimcq_u8 (uint8x16_t data) + { + return __builtin_aarch64_crypto_aesimcv16qi_uu (data); +@@ -11887,7 +11637,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vceq_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll}; ++ return (uint64x1_t) (__a == __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -11911,7 +11661,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vceq_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll}; ++ return (__a == __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12047,7 +11797,7 @@ vceqz_s32 (int32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vceqz_s64 (int64x1_t __a) + { +- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll}; ++ return (uint64x1_t) (__a == __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -12071,7 +11821,7 @@ vceqz_u32 (uint32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vceqz_u64 (uint64x1_t __a) + { +- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll}; ++ return (__a == __AARCH64_UINT64_C (0)); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12201,7 +11951,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcge_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll}; ++ return (uint64x1_t) (__a >= __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -12225,7 +11975,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcge_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll}; ++ return (__a >= __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12349,7 +12099,7 @@ vcgez_s32 (int32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcgez_s64 (int64x1_t __a) + { +- return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll}; ++ return (uint64x1_t) (__a >= __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12443,7 +12193,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcgt_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll); ++ return (uint64x1_t) (__a > __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -12467,7 +12217,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcgt_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll); ++ return (__a > __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12591,7 +12341,7 @@ vcgtz_s32 (int32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcgtz_s64 (int64x1_t __a) + { +- return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll}; ++ return (uint64x1_t) (__a > __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12685,7 +12435,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcle_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll}; ++ return (uint64x1_t) (__a <= __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -12709,7 +12459,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcle_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll}; ++ return (__a <= __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12833,7 +12583,7 @@ vclez_s32 (int32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vclez_s64 (int64x1_t __a) + { +- return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll}; ++ return (uint64x1_t) (__a <= __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -12927,7 +12677,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vclt_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll}; ++ return (uint64x1_t) (__a < __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -12951,7 +12701,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vclt_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll}; ++ return (__a < __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -13075,7 +12825,7 @@ vcltz_s32 (int32x2_t __a) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vcltz_s64 (int64x1_t __a) + { +- return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll}; ++ return (uint64x1_t) (__a < __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +@@ -17067,8 +16817,8 @@ vld4q_dup_f64 (const float64_t * __a) + + /* vld2_lane */ + +-#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, \ +- mode, ptrmode, funcsuffix, signedtype) \ ++#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ ++ qmode, ptrmode, funcsuffix, signedtype) \ + __extension__ static __inline intype __attribute__ ((__always_inline__)) \ + vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + { \ +@@ -17078,12 +16828,12 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ + __temp.val[1] = \ + vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ +- __o = __builtin_aarch64_set_qregoi##mode (__o, \ +- (signedtype) __temp.val[0], \ +- 0); \ +- __o = __builtin_aarch64_set_qregoi##mode (__o, \ +- (signedtype) __temp.val[1], \ +- 1); \ ++ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ ++ (signedtype) __temp.val[0], \ ++ 0); \ ++ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ ++ (signedtype) __temp.val[1], \ ++ 1); \ + __o = __builtin_aarch64_ld2_lane##mode ( \ + (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ + __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \ +@@ -17091,29 +16841,29 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + return __b; \ + } + +-__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf, ++__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf, + sf, f32, float32x4_t) +-__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df, ++__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df, + df, f64, float64x2_t) +-__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, ++__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, + int8x16_t) +-__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi, ++__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, + p16, int16x8_t) +-__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8, ++__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, + int8x16_t) +-__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16, ++__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, + int16x8_t) +-__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32, ++__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, + int32x4_t) +-__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64, ++__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64, + int64x2_t) +-__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, ++__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, + int8x16_t) +-__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi, ++__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, + u16, int16x8_t) +-__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si, ++__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, + u32, int32x4_t) +-__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di, ++__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di, + u64, int64x2_t) + + #undef __LD2_LANE_FUNC +@@ -17152,8 +16902,8 @@ __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) + + /* vld3_lane */ + +-#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, \ +- mode, ptrmode, funcsuffix, signedtype) \ ++#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ ++ qmode, ptrmode, funcsuffix, signedtype) \ + __extension__ static __inline intype __attribute__ ((__always_inline__)) \ + vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + { \ +@@ -17165,15 +16915,15 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ + __temp.val[2] = \ + vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[0], \ +- 0); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[1], \ +- 1); \ +- __o = __builtin_aarch64_set_qregci##mode (__o, \ +- (signedtype) __temp.val[2], \ +- 2); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[0], \ ++ 0); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[1], \ ++ 1); \ ++ __o = __builtin_aarch64_set_qregci##qmode (__o, \ ++ (signedtype) __temp.val[2], \ ++ 2); \ + __o = __builtin_aarch64_ld3_lane##mode ( \ + (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ + __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \ +@@ -17182,29 +16932,29 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + return __b; \ + } + +-__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf, ++__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf, + sf, f32, float32x4_t) +-__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df, ++__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df, + df, f64, float64x2_t) +-__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, ++__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, + int8x16_t) +-__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi, ++__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, + p16, int16x8_t) +-__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8, ++__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, + int8x16_t) +-__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16, ++__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, + int16x8_t) +-__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32, ++__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, + int32x4_t) +-__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64, ++__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64, + int64x2_t) +-__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, ++__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, + int8x16_t) +-__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi, ++__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, + u16, int16x8_t) +-__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si, ++__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si, + u32, int32x4_t) +-__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di, ++__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di, + u64, int64x2_t) + + #undef __LD3_LANE_FUNC +@@ -17245,8 +16995,8 @@ __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) + + /* vld4_lane */ + +-#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, \ +- mode, ptrmode, funcsuffix, signedtype) \ ++#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ ++ qmode, ptrmode, funcsuffix, signedtype) \ + __extension__ static __inline intype __attribute__ ((__always_inline__)) \ + vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + { \ +@@ -17260,18 +17010,18 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ + __temp.val[3] = \ + vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[0], \ +- 0); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[1], \ +- 1); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[2], \ +- 2); \ +- __o = __builtin_aarch64_set_qregxi##mode (__o, \ +- (signedtype) __temp.val[3], \ +- 3); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[0], \ ++ 0); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[1], \ ++ 1); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[2], \ ++ 2); \ ++ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ ++ (signedtype) __temp.val[3], \ ++ 3); \ + __o = __builtin_aarch64_ld4_lane##mode ( \ + (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ + __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \ +@@ -17283,29 +17033,29 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ + + /* vld4q_lane */ + +-__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf, ++__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, + sf, f32, float32x4_t) +-__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df, ++__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, + df, f64, float64x2_t) +-__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, ++__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, + int8x16_t) +-__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi, ++__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, + p16, int16x8_t) +-__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8, ++__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, + int8x16_t) +-__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, ++__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, + int16x8_t) +-__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32, ++__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, + int32x4_t) +-__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64, ++__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, + int64x2_t) +-__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, ++__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, + int8x16_t) +-__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi, ++__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, + u16, int16x8_t) +-__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si, ++__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, + u32, int32x4_t) +-__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di, ++__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, + u64, int64x2_t) + + #undef __LD4_LANE_FUNC +@@ -21321,72 +21071,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) + + /* vsha1 */ + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) + { + return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); + } +-static __inline uint32x4_t ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) + { + return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); + } +-static __inline uint32x4_t ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) + { + return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); + } + +-static __inline uint32_t ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) + vsha1h_u32 (uint32_t hash_e) + { + return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) + { + return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) + { + return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) + { + return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) + { + return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) + { + return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); + } + +-static __inline uint32x4_t ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) + { + return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); + } + +-static __inline poly128_t ++__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) + vmull_p64 (poly64_t a, poly64_t b) + { + return + __builtin_aarch64_crypto_pmulldi_ppp (a, b); + } + +-static __inline poly128_t ++__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) + vmull_high_p64 (poly64x2_t a, poly64x2_t b) + { + return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); +@@ -22302,6 +22054,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b) + *a = b[0]; + } + ++/* vst1q */ ++ + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1q_f32 (float32_t *a, float32x4_t b) + { +@@ -22314,8 +22068,6 @@ vst1q_f64 (float64_t *a, float64x2_t b) + __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); + } + +-/* vst1q */ +- + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1q_p8 (poly8_t *a, poly8x16_t b) + { +@@ -22382,6 +22134,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b) + (int64x2_t) b); + } + ++/* vst1_lane */ ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++/* vst1q_lane */ ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane) ++{ + *__a = __aarch64_vget_lane_any (__b, __lane); +} + -+/* vst1q_lane */ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ + /* vstn */ + + __extension__ static __inline void +@@ -23887,7 +23787,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vtst_s64 (int64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll}; ++ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -23911,7 +23811,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b) + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vtst_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll}; ++ return ((__a & __b) != __AARCH64_UINT64_C (0)); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +--- a/src/gcc/config/aarch64/atomics.md ++++ b/src/gcc/config/aarch64/atomics.md +@@ -26,9 +26,28 @@ + UNSPECV_STL ; Represent an atomic store or store-release. + UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap. + UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange. ++ UNSPECV_ATOMIC_CAS ; Represent an atomic CAS. ++ UNSPECV_ATOMIC_SWP ; Represent an atomic SWP. + UNSPECV_ATOMIC_OP ; Represent an atomic operation. ++ UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation ++ UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or ++ UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic ++ UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor ++ UNSPECV_ATOMIC_LDOP_PLUS ; Represent an atomic load-add + ]) + ++;; Iterators for load-operate instructions. ++ ++(define_int_iterator ATOMIC_LDOP ++ [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC ++ UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS]) ++ ++(define_int_attr atomic_ldop ++ [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr") ++ (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) ++ ++;; Instruction patterns. ++ + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool out + (match_operand:ALLI 1 "register_operand" "") ;; val out +@@ -45,10 +64,10 @@ + } + ) + +-(define_insn_and_split "atomic_compare_and_swap_1" ++(define_insn_and_split "aarch64_compare_and_swap" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) +- (set (match_operand:SI 0 "register_operand" "=&r") ;; val out ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; val out + (zero_extend:SI + (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory + (set (match_dup 1) +@@ -57,7 +76,7 @@ + (match_operand:SHORT 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s +- (match_operand:SI 6 "const_int_operand")] ;; mod_f ++ (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] + "" +@@ -70,17 +89,17 @@ + } + ) + +-(define_insn_and_split "atomic_compare_and_swap_1" ++(define_insn_and_split "aarch64_compare_and_swap" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out +- (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory ++ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:GPI + [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect + (match_operand:GPI 3 "register_operand" "r") ;; desired +- (match_operand:SI 4 "const_int_operand") ;; is_weak +- (match_operand:SI 5 "const_int_operand") ;; mod_s ++ (match_operand:SI 4 "const_int_operand") ;; is_weak ++ (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] +@@ -94,7 +113,79 @@ + } + ) + +-(define_insn_and_split "atomic_exchange" ++(define_insn_and_split "aarch64_compare_and_swap_lse" ++ [(set (reg:CC CC_REGNUM) ;; bool out ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; val out ++ (zero_extend:SI ++ (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected ++ (match_operand:SHORT 3 "register_operand" "r") ;; desired ++ (match_operand:SI 4 "const_int_operand") ;; is_weak ++ (match_operand:SI 5 "const_int_operand") ;; mod_s ++ (match_operand:SI 6 "const_int_operand")] ;; mod_f ++ UNSPECV_ATOMIC_CMPSW))] ++ "TARGET_LSE" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_gen_atomic_cas (operands[0], operands[1], ++ operands[2], operands[3], ++ operands[5]); ++ DONE; ++ } ++) ++ ++(define_insn_and_split "aarch64_compare_and_swap_lse" ++ [(set (reg:CC CC_REGNUM) ;; bool out ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) ++ (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out ++ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory ++ (set (match_dup 1) ++ (unspec_volatile:GPI ++ [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect ++ (match_operand:GPI 3 "register_operand" "r") ;; desired ++ (match_operand:SI 4 "const_int_operand") ;; is_weak ++ (match_operand:SI 5 "const_int_operand") ;; mod_s ++ (match_operand:SI 6 "const_int_operand")] ;; mod_f ++ UNSPECV_ATOMIC_CMPSW))] ++ "TARGET_LSE" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_gen_atomic_cas (operands[0], operands[1], ++ operands[2], operands[3], ++ operands[5]); ++ DONE; ++ } ++) ++ ++(define_expand "atomic_exchange" ++ [(match_operand:ALLI 0 "register_operand" "") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") ++ (match_operand:ALLI 2 "register_operand" "") ++ (match_operand:SI 3 "const_int_operand" "")] ++ "" ++ { ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ ++ /* Use an atomic SWP when available. */ ++ if (TARGET_LSE) ++ gen = gen_aarch64_atomic_exchange_lse; ++ else ++ gen = gen_aarch64_atomic_exchange; ++ ++ emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); ++ ++ DONE; ++ } ++) ++ ++(define_insn_and_split "aarch64_atomic_exchange" + [(set (match_operand:ALLI 0 "register_operand" "=&r") ;; output + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) +@@ -110,28 +201,87 @@ + [(const_int 0)] + { + aarch64_split_atomic_op (SET, operands[0], NULL, operands[1], +- operands[2], operands[3], operands[4]); ++ operands[2], operands[3], operands[4]); ++ DONE; ++ } ++) ++ ++(define_insn_and_split "aarch64_atomic_exchange_lse" ++ [(set (match_operand:ALLI 0 "register_operand" "=&r") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ++ (set (match_dup 1) ++ (unspec_volatile:ALLI ++ [(match_operand:ALLI 2 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "")] ++ UNSPECV_ATOMIC_EXCHG))] ++ "TARGET_LSE" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1], ++ operands[2], operands[3]); ++ DONE; ++ } ++) ++ ++(define_expand "atomic_" ++ [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "") ++ (atomic_op:ALLI ++ (match_operand:ALLI 1 "" "") ++ (match_operand:SI 2 "const_int_operand"))] ++ "" ++ { ++ rtx (*gen) (rtx, rtx, rtx); ++ ++ /* Use an atomic load-operate instruction when possible. */ ++ if (aarch64_atomic_ldop_supported_p ()) ++ gen = gen_aarch64_atomic__lse; ++ else ++ gen = gen_aarch64_atomic_; ++ ++ emit_insn (gen (operands[0], operands[1], operands[2])); ++ + DONE; + } + ) + +-(define_insn_and_split "atomic_" ++(define_insn_and_split "aarch64_atomic_" ++ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") ++ (unspec_volatile:ALLI ++ [(atomic_op:ALLI (match_dup 0) ++ (match_operand:ALLI 1 "" "r")) ++ (match_operand:SI 2 "const_int_operand")] ++ UNSPECV_ATOMIC_OP)) ++ (clobber (reg:CC CC_REGNUM)) ++ (clobber (match_scratch:ALLI 3 "=&r")) ++ (clobber (match_scratch:SI 4 "=&r"))] ++ "" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_split_atomic_op (, NULL, operands[3], operands[0], ++ operands[1], operands[2], operands[4]); ++ DONE; ++ } ++) ++ ++(define_insn_and_split "aarch64_atomic__lse" + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") + (unspec_volatile:ALLI + [(atomic_op:ALLI (match_dup 0) + (match_operand:ALLI 1 "" "r")) +- (match_operand:SI 2 "const_int_operand")] ;; model ++ (match_operand:SI 2 "const_int_operand")] + UNSPECV_ATOMIC_OP)) +- (clobber (reg:CC CC_REGNUM)) +- (clobber (match_scratch:ALLI 3 "=&r")) +- (clobber (match_scratch:SI 4 "=&r"))] +- "" ++ (clobber (match_scratch:ALLI 3 "=&r"))] ++ "TARGET_LSE" + "#" + "&& reload_completed" + [(const_int 0)] + { +- aarch64_split_atomic_op (, NULL, operands[3], operands[0], +- operands[1], operands[2], operands[4]); ++ aarch64_gen_atomic_ldop (, operands[3], NULL, operands[0], ++ operands[1], operands[2]); + DONE; + } + ) +@@ -158,7 +308,30 @@ + } + ) + +-(define_insn_and_split "atomic_fetch_" ++;; Load-operate-store, returning the updated memory data. ++ ++(define_expand "atomic_fetch_" ++ [(match_operand:ALLI 0 "register_operand" "") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") ++ (atomic_op:ALLI ++ (match_operand:ALLI 2 "" "") ++ (match_operand:SI 3 "const_int_operand"))] ++ "" ++{ ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ ++ /* Use an atomic load-operate instruction when possible. */ ++ if (aarch64_atomic_ldop_supported_p ()) ++ gen = gen_aarch64_atomic_fetch__lse; ++ else ++ gen = gen_aarch64_atomic_fetch_; ++ ++ emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); ++ ++ DONE; ++}) ++ ++(define_insn_and_split "aarch64_atomic_fetch_" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) + (set (match_dup 1) +@@ -181,6 +354,26 @@ + } + ) + ++(define_insn_and_split "aarch64_atomic_fetch__lse" ++ [(set (match_operand:ALLI 0 "register_operand" "=&r") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ++ (set (match_dup 1) ++ (unspec_volatile:ALLI ++ [(atomic_op:ALLI (match_dup 1) ++ (match_operand:ALLI 2 "" "r")) ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPECV_ATOMIC_LDOP))] ++ "TARGET_LSE" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_gen_atomic_ldop (, operands[0], NULL, operands[1], ++ operands[2], operands[3]); ++ DONE; ++ } ++) ++ + (define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) +@@ -205,7 +398,31 @@ + } + ) + +-(define_insn_and_split "atomic__fetch" ++;; Load-operate-store, returning the original memory data. ++ ++(define_expand "atomic__fetch" ++ [(match_operand:ALLI 0 "register_operand" "") ++ (atomic_op:ALLI ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") ++ (match_operand:ALLI 2 "" "")) ++ (match_operand:SI 3 "const_int_operand")] ++ "" ++{ ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ rtx value = operands[2]; ++ ++ /* Use an atomic load-operate instruction when possible. */ ++ if (aarch64_atomic_ldop_supported_p ()) ++ gen = gen_aarch64_atomic__fetch_lse; ++ else ++ gen = gen_aarch64_atomic__fetch; ++ ++ emit_insn (gen (operands[0], operands[1], value, operands[3])); ++ ++ DONE; ++}) ++ ++(define_insn_and_split "aarch64_atomic__fetch" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (atomic_op:ALLI + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") +@@ -228,6 +445,29 @@ + } + ) + ++(define_insn_and_split "aarch64_atomic__fetch_lse" ++ [(set (match_operand:ALLI 0 "register_operand" "=&r") ++ (atomic_op:ALLI ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") ++ (match_operand:ALLI 2 "" "r"))) ++ (set (match_dup 1) ++ (unspec_volatile:ALLI ++ [(match_dup 1) ++ (match_dup 2) ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPECV_ATOMIC_LDOP)) ++ (clobber (match_scratch:ALLI 4 "=r"))] ++ "TARGET_LSE" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] ++ { ++ aarch64_gen_atomic_ldop (, operands[4], operands[0], operands[1], ++ operands[2], operands[3]); ++ DONE; ++ } ++) ++ + (define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (not:ALLI +@@ -370,3 +610,100 @@ + return "dmb\\tish"; + } + ) ++ ++;; ARMv8.1 LSE instructions. ++ ++;; Atomic swap with memory. ++(define_insn "aarch64_atomic_swp" ++ [(set (match_operand:ALLI 0 "register_operand" "+&r") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ++ (set (match_dup 1) ++ (unspec_volatile:ALLI ++ [(match_operand:ALLI 2 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "")] ++ UNSPECV_ATOMIC_SWP))] ++ "TARGET_LSE && reload_completed" ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model)) ++ return "swp\t%2, %0, %1"; ++ else if (is_mm_acquire (model) || is_mm_consume (model)) ++ return "swpa\t%2, %0, %1"; ++ else if (is_mm_release (model)) ++ return "swpl\t%2, %0, %1"; ++ else ++ return "swpal\t%2, %0, %1"; ++ }) ++ ++;; Atomic compare-and-swap: HI and smaller modes. ++ ++(define_insn "aarch64_atomic_cas" ++ [(set (match_operand:SI 0 "register_operand" "+&r") ;; out ++ (zero_extend:SI ++ (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory. ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(match_dup 0) ++ (match_operand:SHORT 2 "register_operand" "r") ;; value. ++ (match_operand:SI 3 "const_int_operand" "")] ;; model. ++ UNSPECV_ATOMIC_CAS))] ++ "TARGET_LSE && reload_completed" ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model)) ++ return "cas\t%0, %2, %1"; ++ else if (is_mm_acquire (model) || is_mm_consume (model)) ++ return "casa\t%0, %2, %1"; ++ else if (is_mm_release (model)) ++ return "casl\t%0, %2, %1"; ++ else ++ return "casal\t%0, %2, %1"; ++}) ++ ++;; Atomic compare-and-swap: SI and larger modes. ++ ++(define_insn "aarch64_atomic_cas" ++ [(set (match_operand:GPI 0 "register_operand" "+&r") ;; out ++ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory. ++ (set (match_dup 1) ++ (unspec_volatile:GPI ++ [(match_dup 0) ++ (match_operand:GPI 2 "register_operand" "r") ;; value. ++ (match_operand:SI 3 "const_int_operand" "")] ;; model. ++ UNSPECV_ATOMIC_CAS))] ++ "TARGET_LSE && reload_completed" ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model)) ++ return "cas\t%0, %2, %1"; ++ else if (is_mm_acquire (model) || is_mm_consume (model)) ++ return "casa\t%0, %2, %1"; ++ else if (is_mm_release (model)) ++ return "casl\t%0, %2, %1"; ++ else ++ return "casal\t%0, %2, %1"; ++}) ++ ++;; Atomic load-op: Load data, operate, store result, keep data. ++ ++(define_insn "aarch64_atomic_load" ++ [(set (match_operand:ALLI 0 "register_operand" "=r") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ++ (set (match_dup 1) ++ (unspec_volatile:ALLI ++ [(match_dup 1) ++ (match_operand:ALLI 2 "register_operand") ++ (match_operand:SI 3 "const_int_operand")] ++ ATOMIC_LDOP))] ++ "TARGET_LSE && reload_completed" ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model)) ++ return "ld\t%2, %0, %1"; ++ else if (is_mm_acquire (model) || is_mm_consume (model)) ++ return "lda\t%2, %0, %1"; ++ else if (is_mm_release (model)) ++ return "ldl\t%2, %0, %1"; ++ else ++ return "ldal\t%2, %0, %1"; ++ }) +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/cortex-a57-fma-steering.c +@@ -0,0 +1,1099 @@ ++/* FMA steering optimization pass for Cortex-A57. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "regs.h" ++#include "insn-config.h" ++#include "hard-reg-set.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfganal.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "insn-attr.h" ++#include "machmode.h" ++#include "recog.h" ++#include "output.h" ++#include "vec.h" ++#include "hash-map.h" ++#include "bitmap.h" ++#include "obstack.h" ++#include "df.h" ++#include "target.h" ++#include "rtl.h" ++#include "context.h" ++#include "tree-pass.h" ++#include "regrename.h" ++#include "cortex-a57-fma-steering.h" ++#include "aarch64-protos.h" ++ ++#include ++ ++/* For better performance, the destination of FMADD/FMSUB instructions should ++ have the same parity as their accumulator register if the accumulator ++ contains the result of a previous FMUL or FMADD/FMSUB instruction if ++ targetting Cortex-A57 processors. Performance is also increased by ++ otherwise keeping a good balance in the parity of the destination register ++ of FMUL or FMADD/FMSUB. ++ ++ This pass ensure that registers are renamed so that these conditions hold. ++ We reuse the existing register renaming facility from regrename.c to build ++ dependency chains and expose candidate registers for renaming. ++ ++ ++ The algorithm has three steps: ++ ++ First, the functions of the register renaming pass are called. These ++ analyze the instructions and produce a list of def/use chains of ++ instructions. ++ ++ Next, this information is used to build trees of multiply and ++ multiply-accumulate instructions. The roots of these trees are any ++ multiply, or any multiply-accumulate whose accumulator is not dependent on ++ a multiply or multiply-accumulate instruction. A child is added to the ++ tree where a dependency chain exists between the result of the parent ++ instruction and the accumulator operand of the child, as in the diagram ++ below: ++ ++ fmul s2, s0, s1 ++ / \ ++ fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2 ++ | ++ fmadd s3, s1, s1, s0 ++ ++ Trees made of a single instruction are permitted. ++ ++ Finally, renaming is performed. The parity of the destination register at ++ the root of a tree is checked against the current balance of multiply and ++ multiply-accumulate on each pipeline. If necessary, the root of a tree is ++ renamed, in which case the rest of the tree is then renamed to keep the same ++ parity in the destination registers of all instructions in the tree. */ ++ ++ ++ ++/* Forward declarations. */ ++class fma_node; ++class fma_root_node; ++class func_fma_steering; ++ ++/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent ++ FMADD/FMSUB instructions form a graph. This is because alternatives can ++ make a register be set by several FMUL or FMADD/FMSUB instructions in ++ different basic blocks and because of loops. For ease of browsing, the ++ connected components of this graph are broken up into forests of trees. ++ Forests are represented by fma_forest objects, contained in the fma_forests ++ list. Using a separate object for the forests allows for a better use of ++ memory as there is some information that is global to each forest, such as ++ the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each ++ floating-point execution pipelines. */ ++ ++class fma_forest ++{ ++public: ++ fma_forest (func_fma_steering *, fma_root_node *, int); ++ ~fma_forest (); ++ ++ int get_id (); ++ std::list *get_roots (); ++ func_fma_steering *get_globals (); ++ int get_target_parity (); ++ void fma_node_created (fma_node *); ++ void merge_forest (fma_forest *); ++ void dump_info (); ++ void dispatch (); ++ ++private: ++ /* The list of roots that form this forest. */ ++ std::list *m_roots; ++ ++ /* Target parity the destination register of all FMUL and FMADD/FMSUB ++ instructions in this forest should have. */ ++ int m_target_parity; ++ ++ /* Link to the instance of func_fma_steering holding data related to the ++ FMA steering of the current function (cfun). */ ++ func_fma_steering *m_globals; ++ ++ /* Identifier for the forest (used for dumps). */ ++ int m_id; ++ ++ /* Total number of nodes in the forest (for statistics). */ ++ int m_nb_nodes; ++}; ++ ++class fma_node ++{ ++public: ++ fma_node (fma_node *parent, du_chain *chain); ++ ~fma_node (); ++ ++ bool root_p (); ++ fma_forest *get_forest (); ++ std::list *get_children (); ++ rtx_insn *get_insn (); ++ void add_child (fma_node *); ++ int get_parity (); ++ void set_head (du_head *); ++ void rename (fma_forest *); ++ void dump_info (fma_forest *); ++ ++protected: ++ /* Root node that lead to this node. */ ++ fma_root_node *m_root; ++ ++ /* The parent node of this node. If the node belong to a chain with several ++ parent nodes, the first one encountered in a depth-first search is chosen ++ as canonical parent. */ ++ fma_node *m_parent; ++ ++ /* The list of child nodes. If a chain contains several parent nodes, one is ++ chosen as canonical parent and the others will have no children. */ ++ std::list *m_children; ++ ++ /* The associated DU_HEAD chain that the insn represented by this object ++ is (one of) the root of. When a chain contains several roots, the non ++ canonical ones have this field set to NULL. */ ++ struct du_head *m_head; ++ ++ /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */ ++ rtx_insn *m_insn; ++}; ++ ++class fma_root_node : public fma_node ++{ ++public: ++ fma_root_node (func_fma_steering *, du_chain *, int); ++ ++ fma_forest *get_forest (); ++ void set_forest (fma_forest *); ++ void dump_info (fma_forest *); ++ ++private: ++ /* The forest this node belonged to when it was created. */ ++ fma_forest *m_forest; ++}; ++ ++/* Class holding all data and methods relative to the FMA steering of a given ++ function. The FMA steering pass could then run in parallel for different ++ functions. */ ++ ++class func_fma_steering ++{ ++public: ++ func_fma_steering (); ++ ~func_fma_steering (); ++ ++ int get_fpu_balance (); ++ void remove_forest (fma_forest *); ++ bool put_node (fma_node *); ++ void update_balance (int); ++ fma_node *get_fma_node (rtx_insn *); ++ void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p); ++ void execute_fma_steering (); ++ ++private: ++ void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *), ++ void (*) (fma_forest *, fma_node *), bool); ++ void analyze (); ++ void rename_fma_trees (); ++ ++ /* Mapping between FMUL or FMADD/FMSUB instructions and the associated ++ fma_node object. Used when analyzing an instruction that is a root of ++ a chain to find if such an object was created because this instruction ++ is also a use in another chain. */ ++ hash_map *m_insn_fma_head_map; ++ ++ /* A list of all the forests in a given function. */ ++ std::list m_fma_forests; ++ ++ /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU ++ pipelines: ++ < 0: more instruction dispatched to the first pipeline ++ == 0: perfect balance ++ > 0: more instruction dispatched to the second pipeline. */ ++ int m_fpu_balance; ++ ++ /* Identifier for the next forest created. */ ++ int m_next_forest_id; ++}; ++ ++/* Rename the register HEAD->regno in all the insns in the chain HEAD to any ++ register not in the set UNAVAILABLE. Adapted from rename_chains in ++ regrename.c. */ ++ ++static bool ++rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) ++{ ++ int best_new_reg; ++ int n_uses = 0; ++ struct du_chain *tmp; ++ int reg = head->regno; ++ enum reg_class super_class = NO_REGS; ++ ++ if (head->cannot_rename) ++ return false; ++ ++ if (fixed_regs[reg] || global_regs[reg] ++ || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM)) ++ return false; ++ ++ /* Iterate over elements in the chain in order to: ++ 1. Count number of uses, and narrow the set of registers we can ++ use for renaming. ++ 2. Compute the superunion of register classes in this chain. */ ++ for (tmp = head->first; tmp; tmp = tmp->next_use) ++ { ++ if (DEBUG_INSN_P (tmp->insn)) ++ continue; ++ n_uses++; ++ IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); ++ super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; ++ } ++ ++ if (n_uses < 1) ++ return false; ++ ++ best_new_reg = find_rename_reg (head, super_class, unavailable, reg, ++ false); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Register %s in insn %d", reg_names[reg], ++ INSN_UID (head->first->insn)); ++ if (head->need_caller_save_reg) ++ fprintf (dump_file, " crosses a call"); ++ } ++ ++ if (best_new_reg == reg) ++ { ++ if (dump_file) ++ fprintf (dump_file, "; no available better choice\n"); ++ return false; ++ } ++ ++ if (regrename_do_replace (head, best_new_reg)) ++ { ++ if (dump_file) ++ fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); ++ df_set_regs_ever_live (best_new_reg, true); ++ } ++ else ++ { ++ if (dump_file) ++ fprintf (dump_file, ", renaming as %s failed\n", ++ reg_names[best_new_reg]); ++ return false; ++ } ++ return true; ++} ++ ++/* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */ ++ ++static bool ++is_fmac_op (enum attr_type t) ++{ ++ return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S); ++} ++ ++/* Return whether T is the attribute of a FMUL instruction. */ ++ ++static bool ++is_fmul_op (enum attr_type t) ++{ ++ return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S); ++} ++ ++/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB ++ instruction. */ ++ ++static bool ++is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok) ++{ ++ enum attr_type t; ++ ++ if (!NONDEBUG_INSN_P (insn)) ++ return false; ++ ++ if (recog_memoized (insn) < 0) ++ return false; ++ ++ /* Only consider chain(s) this instruction is a root of if this is an FMUL or ++ FMADD/FMSUB instruction. This allows to avoid browsing chains of all ++ instructions for FMUL or FMADD/FMSUB in them. */ ++ t = get_attr_type (insn); ++ return is_fmac_op (t) || (fmul_ok && is_fmul_op (t)); ++} ++ ++ ++/* ++ * Class fma_forest method definitions. ++ */ ++ ++fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root, ++ int id) ++{ ++ memset (this, 0, sizeof (*this)); ++ this->m_globals = fma_steer; ++ this->m_roots = new std::list; ++ this->m_roots->push_back (fma_root); ++ this->m_id = id; ++} ++ ++fma_forest::~fma_forest () ++{ ++ delete this->m_roots; ++} ++ ++int ++fma_forest::get_id () ++{ ++ return this->m_id; ++} ++ ++std::list * ++fma_forest::get_roots () ++{ ++ return this->m_roots; ++} ++ ++func_fma_steering * ++fma_forest::get_globals () ++{ ++ return this->m_globals; ++} ++ ++int ++fma_forest::get_target_parity () ++{ ++ return this->m_target_parity; ++} ++ ++/* Act on the creation of NODE by updating statistics in FOREST and adding an ++ entry for it in the func_fma_steering hashmap. */ ++ ++void fma_forest::fma_node_created (fma_node *node) ++{ ++ bool created = !this->m_globals->put_node (node); ++ ++ gcc_assert (created); ++ this->m_nb_nodes++; ++} ++ ++/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical ++ fma_forest object to represent both. */ ++ ++void ++fma_forest::merge_forest (fma_forest *other_forest) ++{ ++ std::list *other_roots; ++ std::list::iterator other_root_iter; ++ ++ if (this == other_forest) ++ return; ++ ++ other_roots = other_forest->m_roots; ++ ++ /* Update root nodes' pointer to forest. */ ++ for (other_root_iter = other_roots->begin (); ++ other_root_iter != other_roots->end (); other_root_iter++) ++ (*other_root_iter)->set_forest (this); ++ ++ /* Remove other_forest from the list of forests and move its tree roots in ++ the list of tree roots of ref_forest. */ ++ this->m_globals->remove_forest (other_forest); ++ this->m_roots->splice (this->m_roots->begin (), *other_roots); ++ delete other_forest; ++ ++ this->m_nb_nodes += other_forest->m_nb_nodes; ++} ++ ++/* Dump information about the forest FOREST. */ ++ ++void ++fma_forest::dump_info () ++{ ++ gcc_assert (dump_file); ++ ++ fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id, ++ this->m_nb_nodes); ++} ++ ++/* Wrapper around fma_forest::dump_info for use as parameter of function ++ pointer type in func_fma_steering::dfs. */ ++ ++static void ++dump_forest_info (fma_forest *forest) ++{ ++ forest->dump_info (); ++} ++ ++/* Dispatch forest to the least utilized pipeline. */ ++ ++void ++fma_forest::dispatch () ++{ ++ this->m_target_parity = this->m_roots->front ()->get_parity (); ++ int fpu_balance = this->m_globals->get_fpu_balance (); ++ if (fpu_balance != 0) ++ this->m_target_parity = (fpu_balance < 0); ++ ++ if (dump_file) ++ fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id, ++ this->m_target_parity ? "odd" : "even"); ++} ++ ++/* Wrapper around fma_forest::dispatch for use as parameter of function pointer ++ type in func_fma_steering::dfs. */ ++ ++static void ++dispatch_forest (fma_forest *forest) ++{ ++ forest->dispatch (); ++} ++ ++fma_node::fma_node (fma_node *parent, du_chain *chain) ++{ ++ memset (this, 0, sizeof (*this)); ++ this->m_parent = parent; ++ this->m_children = new std::list; ++ this->m_insn = chain->insn; ++ /* root_p () cannot be used to check for root before root is set. */ ++ if (this->m_parent == this) ++ this->m_root = static_cast (parent); ++ else ++ { ++ this->m_root = parent->m_root; ++ this->get_forest ()->fma_node_created (this); ++ } ++} ++ ++fma_node::~fma_node () ++{ ++ delete this->m_children; ++} ++ ++std::list * ++fma_node::get_children () ++{ ++ return this->m_children; ++} ++ ++rtx_insn * ++fma_node::get_insn () ++{ ++ return this->m_insn; ++} ++ ++void ++fma_node::set_head (du_head *head) ++{ ++ gcc_assert (!this->m_head); ++ this->m_head = head; ++} ++ ++/* Add a child to this node in the list of children. */ ++ ++void ++fma_node::add_child (fma_node *child) ++{ ++ this->m_children->push_back (child); ++} ++ ++/* Return the parity of the destination register of the instruction represented ++ by this node. */ ++ ++int ++fma_node::get_parity () ++{ ++ return this->m_head->regno % 2; ++} ++ ++/* Get the actual forest associated with a non root node as the one the node ++ points to might have been merged into another one. In that case the pointer ++ in the root nodes are updated so we return the forest pointer of a root node ++ pointed to by the initial forest. Despite being a oneliner, this method is ++ defined here as it references a method from fma_root_node. */ ++ ++fma_forest * ++fma_node::get_forest () ++{ ++ return this->m_root->get_forest (); ++} ++ ++/* Return whether a node is a root node. */ ++ ++bool ++fma_node::root_p () ++{ ++ return this->m_root == this; ++} ++ ++/* Dump information about the children of node FMA_NODE in forest FOREST. */ ++ ++void ++fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest) ++{ ++ struct du_chain *chain; ++ std::list::iterator fma_child; ++ ++ gcc_assert (dump_file); ++ ++ if (this->get_children ()->empty ()) ++ return; ++ ++ fprintf (dump_file, "Instruction(s)"); ++ for (chain = this->m_head->first; chain; chain = chain->next_use) ++ { ++ if (!is_fmul_fmac_insn (chain->insn, true)) ++ continue; ++ ++ if (chain->loc != &SET_DEST (PATTERN (chain->insn))) ++ continue; ++ ++ fprintf (dump_file, " %d", INSN_UID (chain->insn)); ++ } ++ ++ fprintf (dump_file, " is(are) accumulator dependency of instructions"); ++ for (fma_child = this->get_children ()->begin (); ++ fma_child != this->get_children ()->end (); fma_child++) ++ fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn)); ++ fprintf (dump_file, "\n"); ++} ++ ++/* Wrapper around fma_node::dump_info for use as parameter of function pointer ++ type in func_fma_steering::dfs. */ ++ ++static void ++dump_tree_node_info (fma_forest *forest, fma_node *node) ++{ ++ node->dump_info (forest); ++} ++ ++/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction ++ represented by FMA_NODE to a register that respect the target parity for ++ FOREST or with same parity of the instruction represented by its parent node ++ if it has one. */ ++ ++void ++fma_node::rename (fma_forest *forest) ++{ ++ int cur_parity, target_parity; ++ ++ /* This is alternate root of a chain and thus has no children. It will be ++ renamed when processing the canonical root for that chain. */ ++ if (!this->m_head) ++ return; ++ ++ target_parity = forest->get_target_parity (); ++ if (this->m_parent) ++ target_parity = this->m_parent->get_parity (); ++ cur_parity = this->get_parity (); ++ ++ /* Rename if parity differs. */ ++ if (cur_parity != target_parity) ++ { ++ rtx_insn *insn = this->m_insn; ++ HARD_REG_SET unavailable; ++ enum machine_mode mode; ++ int reg; ++ ++ if (dump_file) ++ { ++ unsigned cur_dest_reg = this->m_head->regno; ++ ++ fprintf (dump_file, "FMA or FMUL at insn %d but destination " ++ "register (%s) has different parity from expected to " ++ "maximize FPU pipeline utilization\n", INSN_UID (insn), ++ reg_names[cur_dest_reg]); ++ } ++ ++ /* Don't clobber traceback for noreturn functions. */ ++ CLEAR_HARD_REG_SET (unavailable); ++ if (frame_pointer_needed) ++ { ++ add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM); ++ add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM); ++ } ++ ++ /* Exclude registers with wrong parity. */ ++ mode = GET_MODE (SET_DEST (PATTERN (insn))); ++ for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2) ++ add_to_hard_reg_set (&unavailable, mode, reg); ++ ++ if (!rename_single_chain (this->m_head, &unavailable)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Destination register of insn %d could not be " ++ "renamed. Dependent FMA insns will use this parity from " ++ "there on.\n", INSN_UID (insn)); ++ } ++ else ++ cur_parity = target_parity; ++ } ++ ++ forest->get_globals ()->update_balance (cur_parity); ++} ++ ++/* Wrapper around fma_node::dump_info for use as parameter of function pointer ++ type in func_fma_steering::dfs. */ ++ ++static void ++rename_fma_node (fma_forest *forest, fma_node *node) ++{ ++ node->rename (forest); ++} ++ ++fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain, ++ int id) : fma_node (this, chain) ++{ ++ this->m_forest = new fma_forest (globals, this, id); ++ this->m_forest->fma_node_created (this); ++} ++ ++fma_forest * ++fma_root_node::get_forest () ++{ ++ return this->m_forest; ++} ++ ++void ++fma_root_node::set_forest (fma_forest *ref_forest) ++{ ++ this->m_forest = ref_forest; ++} ++ ++/* Dump information about the roots of forest FOREST. */ ++ ++void ++fma_root_node::dump_info (fma_forest *forest) ++{ ++ gcc_assert (dump_file); ++ ++ if (this == forest->get_roots ()->front ()) ++ fprintf (dump_file, "Instruction(s) at root of forest #%d:", ++ forest->get_id ()); ++ fprintf (dump_file, " %d", INSN_UID (this->m_insn)); ++ if (this == forest->get_roots ()->back ()) ++ fprintf (dump_file, "\n"); ++} ++ ++/* Wrapper around fma_root_node::dump_info for use as parameter of function ++ pointer type in func_fma_steering::dfs. */ ++ ++static void ++dump_tree_root_info (fma_forest *forest, fma_root_node *node) ++{ ++ node->dump_info (forest); ++} ++ ++func_fma_steering::func_fma_steering () : m_fpu_balance (0) ++{ ++ this->m_insn_fma_head_map = new hash_map; ++ this->m_fma_forests.clear (); ++ this->m_next_forest_id = 0; ++} ++ ++func_fma_steering::~func_fma_steering () ++{ ++ delete this->m_insn_fma_head_map; ++} ++ ++int ++func_fma_steering::get_fpu_balance () ++{ ++ return this->m_fpu_balance; ++} ++ ++void ++func_fma_steering::remove_forest (fma_forest *forest) ++{ ++ this->m_fma_forests.remove (forest); ++} ++ ++/* Memorize the mapping of this instruction to its fma_node object and return ++ whether such a mapping existed. */ ++ ++bool ++func_fma_steering::put_node (fma_node *node) ++{ ++ return this->m_insn_fma_head_map->put (node->get_insn (), node); ++} ++ ++/* Update the current balance considering a node with the given PARITY. */ ++ ++void ++func_fma_steering::update_balance (int parity) ++{ ++ this->m_fpu_balance = parity ? this->m_fpu_balance + 1 ++ : this->m_fpu_balance - 1; ++} ++ ++/* Return whether an fma_node object exists for instruction INSN and, if not, ++ allocate one in *RET. */ ++ ++fma_node * ++func_fma_steering::get_fma_node (rtx_insn *insn) ++{ ++ fma_node **fma_slot; ++ ++ fma_slot = this->m_insn_fma_head_map->get (insn); ++ if (fma_slot) ++ return *fma_slot; ++ return NULL; ++} ++ ++/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB ++ instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all ++ part of FOREST. For the children, the associated head is left untouched ++ (and thus null) as this function will be called again when considering the ++ chain where they are def. For the parent, the chain is given in HEAD. */ ++ ++void ++func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest, ++ du_chain *chain, du_head_p head) ++{ ++ fma_forest *forest; ++ fma_node *node = this->get_fma_node (chain->insn); ++ ++ /* This is a root node. */ ++ if (!node) ++ { ++ fma_root_node *root_node; ++ ++ root_node = new fma_root_node (this, chain, this->m_next_forest_id++); ++ forest = root_node->get_forest (); ++ node = root_node; ++ ++ /* Until proved otherwise, assume this root is not part of an existing ++ forest and thus add its forest to the list of forests. */ ++ this->m_fma_forests.push_back (forest); ++ } ++ else ++ forest = node->get_forest (); ++ ++ node->set_head (head); ++ ++ /* fma_node is part of a chain with several defs, one of them having already ++ been processed. The root of that already processed def is the canonical ++ one and the root of fma_node is added to its forest. No need to process ++ the children nodes as they were already processed when the other def was ++ processed. */ ++ if (ref_forest) ++ { ++ ref_forest->merge_forest (forest); ++ return; ++ } ++ ++ for (chain = head->first; chain; chain = chain->next_use) ++ { ++ fma_node *child_fma; ++ rtx fma_rtx, *accum_rtx_p; ++ ++ if (!is_fmul_fmac_insn (chain->insn, false)) ++ continue; ++ ++ /* Get FMA rtx. */ ++ fma_rtx = SET_SRC (PATTERN (chain->insn)); ++ /* FMA is negated. */ ++ if (GET_CODE (fma_rtx) == NEG) ++ fma_rtx = XEXP (fma_rtx, 0); ++ /* Get accumulator rtx. */ ++ accum_rtx_p = &XEXP (fma_rtx, 2); ++ /* Accumulator is negated. */ ++ if (!REG_P (*accum_rtx_p)) ++ accum_rtx_p = &XEXP (*accum_rtx_p, 0); ++ ++ /* This du_chain structure is not for the accumulator register. */ ++ if (accum_rtx_p != chain->loc) ++ continue; ++ ++ /* If object already created, this is a loop carried dependency. We ++ don't include this object in the children as we want trees for ++ rename_fma_trees to not be an infinite loop. */ ++ if (this->get_fma_node (chain->insn)) ++ continue; ++ ++ child_fma = new fma_node (node, chain); ++ ++ /* Memorize the mapping of this instruction to its fma_node object ++ as it will be processed for the chain starting at its destination ++ register later. */ ++ ++ /* Link to siblings. */ ++ node->add_child (child_fma); ++ } ++} ++ ++/* Perform a depth-first search of the forests of fma_node in ++ THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in ++ THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and ++ PROCESS_NODE () on each node. If FREE is true, free all std::list in the ++ same dfs. */ ++ ++void ++func_fma_steering::dfs (void (*process_forest) (fma_forest *), ++ void (*process_root) (fma_forest *, fma_root_node *), ++ void (*process_node) (fma_forest *, fma_node *), ++ bool free) ++{ ++ vec to_process; ++ std::list::iterator forest_iter; ++ ++ to_process.create (0); ++ ++ /* For each forest. */ ++ for (forest_iter = this->m_fma_forests.begin (); ++ forest_iter != this->m_fma_forests.end (); forest_iter++) ++ { ++ std::list::iterator root_iter; ++ ++ if (process_forest) ++ process_forest (*forest_iter); ++ ++ /* For each tree root in this forest. */ ++ for (root_iter = (*forest_iter)->get_roots ()->begin (); ++ root_iter != (*forest_iter)->get_roots ()->end (); root_iter++) ++ { ++ if (process_root) ++ process_root (*forest_iter, *root_iter); ++ to_process.safe_push (*root_iter); ++ } ++ ++ /* For each tree node in this forest. */ ++ while (!to_process.is_empty ()) ++ { ++ fma_node *node; ++ std::list::iterator child_iter; ++ ++ node = to_process.pop (); ++ ++ if (process_node) ++ process_node (*forest_iter, node); ++ ++ /* Absence of children might indicate an alternate root of a *chain*. ++ It's ok to skip it here as the chain will be renamed when ++ processing the canonical root for that chain. */ ++ if (node->get_children ()->empty ()) ++ continue; ++ ++ for (child_iter = node->get_children ()->begin (); ++ child_iter != node->get_children ()->end (); child_iter++) ++ to_process.safe_push (*child_iter); ++ if (free) ++ { ++ if (node->root_p ()) ++ delete static_cast (node); ++ else ++ delete node; ++ } ++ } ++ if (free) ++ delete *forest_iter; ++ } ++ ++ to_process.release (); ++} ++ ++/* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */ ++ ++void ++func_fma_steering::analyze () ++{ ++ int i, n_blocks, *bb_dfs_preorder; ++ basic_block bb; ++ rtx_insn *insn; ++ ++ bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); ++ n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); ++ ++ /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB ++ instructions. */ ++ for (i = 0; i < n_blocks; i++) ++ { ++ bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); ++ FOR_BB_INSNS (bb, insn) ++ { ++ operand_rr_info *dest_op_info; ++ struct du_chain *chain; ++ unsigned dest_regno; ++ fma_forest *forest; ++ du_head_p head; ++ int i; ++ ++ if (!is_fmul_fmac_insn (insn, true)) ++ continue; ++ ++ /* Search the chain where this instruction is (one of) the root. */ ++ dest_op_info = insn_rr[INSN_UID (insn)].op_info; ++ dest_regno = REGNO (SET_DEST (PATTERN (insn))); ++ for (i = 0; i < dest_op_info->n_chains; i++) ++ { ++ /* The register tracked by this chain does not match the ++ destination register of insn. */ ++ if (dest_op_info->heads[i]->regno != dest_regno) ++ continue; ++ ++ head = dest_op_info->heads[i]; ++ /* The chain was merged in another, find the new head. */ ++ if (!head->first) ++ head = regrename_chain_from_id (head->id); ++ ++ /* Search the chain element for this instruction and, if another ++ FMUL or FMADD/FMSUB instruction was already processed, note ++ the forest of its tree. */ ++ forest = NULL; ++ for (chain = head->first; chain; chain = chain->next_use) ++ { ++ fma_node **fma_slot; ++ ++ if (!is_fmul_fmac_insn (chain->insn, true)) ++ continue; ++ ++ /* This is a use, continue. */ ++ if (chain->loc != &SET_DEST (PATTERN (chain->insn))) ++ continue; ++ ++ if (chain->insn == insn) ++ break; ++ ++ fma_slot = this->m_insn_fma_head_map->get (chain->insn); ++ if (fma_slot && (*fma_slot)->get_children ()) ++ forest = (*fma_slot)->get_forest (); ++ } ++ if (chain) ++ break; ++ } ++ ++ /* We didn't find a chain with a def for this instruction. */ ++ gcc_assert (i < dest_op_info->n_chains); ++ ++ this->analyze_fma_fmul_insn (forest, chain, head); ++ } ++ } ++ free (bb_dfs_preorder); ++ ++ if (dump_file) ++ this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info, ++ false); ++} ++ ++/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with ++ the objective of keeping FPU pipeline balanced in term of instructions and ++ having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be ++ scheduled on the same pipeline. */ ++ ++void ++func_fma_steering::rename_fma_trees () ++{ ++ this->dfs (dispatch_forest, NULL, rename_fma_node, true); ++ ++ if (dump_file && !this->m_fma_forests.empty ()) ++ { ++ fprintf (dump_file, "Function %s has ", current_function_name ()); ++ if (this->m_fpu_balance == 0) ++ fprintf (dump_file, "perfect balance of FMUL/FMA chains between the " ++ "two FPU pipelines\n"); ++ else if (this->m_fpu_balance > 0) ++ fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second " ++ "FPU pipeline\n", this->m_fpu_balance); ++ else /* this->m_fpu_balance < 0 */ ++ fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first " ++ "FPU pipeline\n", - this->m_fpu_balance); ++ } ++} ++ ++/* Execute FMA steering pass. */ ++ ++void ++func_fma_steering::execute_fma_steering () ++{ ++ df_set_flags (DF_LR_RUN_DCE); ++ df_note_add_problem (); ++ df_analyze (); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ ++ regrename_init (true); ++ regrename_analyze (NULL); ++ this->analyze (); ++ this->rename_fma_trees (); ++ regrename_finish (); ++} ++ ++const pass_data pass_data_fma_steering = ++{ ++ RTL_PASS, /* type */ ++ "fma_steering", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_NONE, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_fma_steering : public rtl_opt_pass ++{ ++public: ++ pass_fma_steering (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_fma_steering, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return (aarch64_tune_params.extra_tuning_flags ++ & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) ++ && optimize >= 2; ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ func_fma_steering *fma_steering = new func_fma_steering; ++ fma_steering->execute_fma_steering (); ++ delete fma_steering; ++ return 0; ++ } ++ ++}; // class pass_fma_steering ++ ++/* Create a new fma steering pass instance. */ ++ ++static rtl_opt_pass * ++make_pass_fma_steering (gcc::context *ctxt) ++{ ++ return new pass_fma_steering (ctxt); ++} ++ ++/* Register the FMA steering pass to the pass manager. */ ++ ++void ++aarch64_register_fma_steering () ++{ ++ opt_pass *pass_fma_steering = make_pass_fma_steering (g); ++ ++ static struct register_pass_info fma_steering_info ++ = { pass_fma_steering, "rnreg", 1, PASS_POS_INSERT_AFTER }; ++ ++ register_pass (&fma_steering_info); ++} +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/cortex-a57-fma-steering.h +@@ -0,0 +1,22 @@ ++/* This file contains declarations for the FMA steering optimization ++ pass for Cortex-A57. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++void aarch64_register_fma_steering (void); +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/driver-aarch64.c +@@ -0,0 +1,307 @@ ++/* Native CPU detection for aarch64. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#include "config.h" ++#include "system.h" ++ ++struct arch_extension ++{ ++ const char *ext; ++ const char *feat_string; ++}; ++ ++#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \ ++ { EXT_NAME, FEATURE_STRING }, ++static struct arch_extension ext_to_feat_string[] = ++{ ++#include "aarch64-option-extensions.def" ++}; ++#undef AARCH64_OPT_EXTENSION ++ ++ ++struct aarch64_core_data ++{ ++ const char* name; ++ const char* arch; ++ const char* implementer_id; ++ const char* part_no; ++}; ++ ++#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ ++ { CORE_NAME, #ARCH, IMP, PART }, ++ ++static struct aarch64_core_data cpu_data [] = ++{ ++#include "aarch64-cores.def" ++ { NULL, NULL, NULL, NULL } ++}; ++ ++#undef AARCH64_CORE ++ ++struct aarch64_arch ++{ ++ const char* id; ++ const char* name; ++}; ++ ++#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ ++ { #ARCH, NAME }, ++ ++static struct aarch64_arch aarch64_arches [] = ++{ ++#include "aarch64-arches.def" ++ {NULL, NULL} ++}; ++ ++#undef AARCH64_ARCH ++ ++/* Return the full architecture name string corresponding to the ++ identifier ID. */ ++ ++static const char* ++get_arch_name_from_id (const char* id) ++{ ++ unsigned int i = 0; ++ ++ for (i = 0; aarch64_arches[i].id != NULL; i++) ++ { ++ if (strcmp (id, aarch64_arches[i].id) == 0) ++ return aarch64_arches[i].name; ++ } ++ ++ return NULL; ++} ++ ++ ++/* Check wether the string CORE contains the same CPU part numbers ++ as BL_STRING. For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03" ++ should return true. */ ++ ++static bool ++valid_bL_string_p (const char** core, const char* bL_string) ++{ ++ return strstr (bL_string, core[0]) != NULL ++ && strstr (bL_string, core[1]) != NULL; ++} ++ ++/* Return true iff ARR contains STR in one of its two elements. */ ++ ++static bool ++contains_string_p (const char** arr, const char* str) ++{ ++ bool res = false; ++ ++ if (arr[0] != NULL) ++ { ++ res = strstr (arr[0], str) != NULL; ++ if (res) ++ return res; ++ ++ if (arr[1] != NULL) ++ return strstr (arr[1], str) != NULL; ++ } ++ ++ return false; ++} ++ ++/* This will be called by the spec parser in gcc.c when it sees ++ a %:local_cpu_detect(args) construct. Currently it will be called ++ with either "arch", "cpu" or "tune" as argument depending on if ++ -march=native, -mcpu=native or -mtune=native is to be substituted. ++ ++ It returns a string containing new command line parameters to be ++ put at the place of the above two options, depending on what CPU ++ this is executed. E.g. "-march=armv8-a" on a Cortex-A57 for ++ -march=native. If the routine can't detect a known processor, ++ the -march or -mtune option is discarded. ++ ++ For -mtune and -mcpu arguments it attempts to detect the CPU or ++ a big.LITTLE system. ++ ARGC and ARGV are set depending on the actual arguments given ++ in the spec. */ ++ ++const char * ++host_detect_local_cpu (int argc, const char **argv) ++{ ++ const char *arch_id = NULL; ++ const char *res = NULL; ++ static const int num_exts = ARRAY_SIZE (ext_to_feat_string); ++ char buf[128]; ++ FILE *f = NULL; ++ bool arch = false; ++ bool tune = false; ++ bool cpu = false; ++ unsigned int i = 0; ++ unsigned int core_idx = 0; ++ const char* imps[2] = { NULL, NULL }; ++ const char* cores[2] = { NULL, NULL }; ++ unsigned int n_cores = 0; ++ unsigned int n_imps = 0; ++ bool processed_exts = false; ++ const char *ext_string = ""; ++ ++ gcc_assert (argc); ++ ++ if (!argv[0]) ++ goto not_found; ++ ++ /* Are we processing -march, mtune or mcpu? */ ++ arch = strcmp (argv[0], "arch") == 0; ++ if (!arch) ++ tune = strcmp (argv[0], "tune") == 0; ++ ++ if (!arch && !tune) ++ cpu = strcmp (argv[0], "cpu") == 0; ++ ++ if (!arch && !tune && !cpu) ++ goto not_found; ++ ++ f = fopen ("/proc/cpuinfo", "r"); ++ ++ if (f == NULL) ++ goto not_found; ++ ++ /* Look through /proc/cpuinfo to determine the implementer ++ and then the part number that identifies a particular core. */ ++ while (fgets (buf, sizeof (buf), f) != NULL) ++ { ++ if (strstr (buf, "implementer") != NULL) ++ { ++ for (i = 0; cpu_data[i].name != NULL; i++) ++ if (strstr (buf, cpu_data[i].implementer_id) != NULL ++ && !contains_string_p (imps, cpu_data[i].implementer_id)) ++ { ++ if (n_imps == 2) ++ goto not_found; ++ ++ imps[n_imps++] = cpu_data[i].implementer_id; ++ ++ break; ++ } ++ continue; ++ } ++ ++ if (strstr (buf, "part") != NULL) ++ { ++ for (i = 0; cpu_data[i].name != NULL; i++) ++ if (strstr (buf, cpu_data[i].part_no) != NULL ++ && !contains_string_p (cores, cpu_data[i].part_no)) ++ { ++ if (n_cores == 2) ++ goto not_found; ++ ++ cores[n_cores++] = cpu_data[i].part_no; ++ core_idx = i; ++ arch_id = cpu_data[i].arch; ++ break; ++ } ++ continue; ++ } ++ if (!tune && !processed_exts && strstr (buf, "Features") != NULL) ++ { ++ for (i = 0; i < num_exts; i++) ++ { ++ bool enabled = true; ++ char *p = NULL; ++ char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL); ++ ++ p = strtok (feat_string, " "); ++ ++ while (p != NULL) ++ { ++ if (strstr (buf, p) == NULL) ++ { ++ enabled = false; ++ break; ++ } ++ p = strtok (NULL, " "); ++ } ++ ext_string = concat (ext_string, "+", enabled ? "" : "no", ++ ext_to_feat_string[i].ext, NULL); ++ } ++ processed_exts = true; ++ } ++ } ++ ++ fclose (f); ++ f = NULL; ++ ++ /* Weird cpuinfo format that we don't know how to handle. */ ++ if (n_cores == 0 || n_cores > 2 || n_imps != 1) ++ goto not_found; ++ ++ if (arch && !arch_id) ++ goto not_found; ++ ++ if (arch) ++ { ++ const char* arch_name = get_arch_name_from_id (arch_id); ++ ++ /* We got some arch indentifier that's not in aarch64-arches.def? */ ++ if (!arch_name) ++ goto not_found; ++ ++ res = concat ("-march=", arch_name, NULL); ++ } ++ /* We have big.LITTLE. */ ++ else if (n_cores == 2) ++ { ++ for (i = 0; cpu_data[i].name != NULL; i++) ++ { ++ if (strchr (cpu_data[i].part_no, '.') != NULL ++ && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0 ++ && valid_bL_string_p (cores, cpu_data[i].part_no)) ++ { ++ res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL); ++ break; ++ } ++ } ++ if (!res) ++ goto not_found; ++ } ++ /* The simple, non-big.LITTLE case. */ ++ else ++ { ++ if (strncmp (cpu_data[core_idx].implementer_id, imps[0], ++ strlen (imps[0]) - 1) != 0) ++ goto not_found; ++ ++ res = concat ("-m", cpu ? "cpu" : "tune", "=", ++ cpu_data[core_idx].name, NULL); ++ } ++ ++ if (tune) ++ return res; ++ ++ res = concat (res, ext_string, NULL); ++ ++ return res; ++ ++not_found: ++ { ++ /* If detection fails we ignore the option. ++ Clean up and return empty string. */ ++ ++ if (f) ++ fclose (f); ++ ++ return ""; ++ } ++} ++ +--- a/src/gcc/config/aarch64/iterators.md ++++ b/src/gcc/config/aarch64/iterators.md +@@ -537,24 +537,15 @@ + + (define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI") + (V2SI "V8SI") (V2SF "V8SF") +- (DI "V4DI") (DF "V4DF") +- (V16QI "V32QI") (V8HI "V16HI") +- (V4SI "V8SI") (V4SF "V8SF") +- (V2DI "V4DI") (V2DF "V4DF")]) ++ (DI "V4DI") (DF "V4DF")]) + + (define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI") + (V2SI "V12SI") (V2SF "V12SF") +- (DI "V6DI") (DF "V6DF") +- (V16QI "V48QI") (V8HI "V24HI") +- (V4SI "V12SI") (V4SF "V12SF") +- (V2DI "V6DI") (V2DF "V6DF")]) ++ (DI "V6DI") (DF "V6DF")]) + + (define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI") + (V2SI "V16SI") (V2SF "V16SF") +- (DI "V8DI") (DF "V8DF") +- (V16QI "V64QI") (V8HI "V32HI") +- (V4SI "V16SI") (V4SF "V16SF") +- (V2DI "V8DI") (V2DF "V8DF")]) ++ (DI "V8DI") (DF "V8DF")]) + + (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) + +--- a/src/gcc/config/aarch64/t-aarch64 ++++ b/src/gcc/config/aarch64/t-aarch64 +@@ -48,6 +48,16 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/aarch-common.c + ++cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \ ++ $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \ ++ dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \ ++ output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \ ++ $(CONTEXT_H) $(TREE_PASS_H) regrename.h \ ++ $(srcdir)/config/aarch64/cortex-a57-fma-steering.h \ ++ $(srcdir)/config/aarch64/aarch64-protos.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/cortex-a57-fma-steering.c ++ + comma=, + MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) + MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) +--- a/src//dev/null ++++ b/src/gcc/config/aarch64/x-aarch64 +@@ -0,0 +1,3 @@ ++driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \ ++ $(CONFIG_H) $(SYSTEM_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< +--- a/src/gcc/config/alpha/linux.h ++++ b/src/gcc/config/alpha/linux.h +@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) + #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) + #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) + #else + #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) + #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) + #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (linux_libc == LIBC_MUSL) + #endif + + /* Determine what functions are present at the runtime; +--- a/src/gcc/config/arm/aarch-common-protos.h ++++ b/src/gcc/config/arm/aarch-common-protos.h +@@ -102,6 +102,8 @@ struct mem_cost_table + const int storef; /* SFmode. */ + const int stored; /* DFmode. */ + const int store_unaligned; /* Extra for unaligned stores. */ ++ const int loadv; /* Vector load. */ ++ const int storev; /* Vector store. */ + }; + + struct fp_cost_table +--- a/src/gcc/config/arm/aarch-cost-tables.h ++++ b/src/gcc/config/arm/aarch-cost-tables.h +@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs = + 1, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (3), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs = + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ +- COSTS_N_INSNS (2), /* shift_reg. */ ++ 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ +- COSTS_N_INSNS (2), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ +- COSTS_N_INSNS (2), /* log_shift_reg. */ +- 0, /* extend. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ +@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs = + 0, /* storef. */ + 0, /* stored. */ + 0, /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +--- a/src/gcc/config/arm/arm-arches.def ++++ b/src/gcc/config/arm/arm-arches.def +@@ -44,7 +44,8 @@ ARM_ARCH("armv6", arm1136js, 6, FL_CO_PROC | FL_FOR_ARCH6) + ARM_ARCH("armv6j", arm1136js, 6J, FL_CO_PROC | FL_FOR_ARCH6J) + ARM_ARCH("armv6k", mpcore, 6K, FL_CO_PROC | FL_FOR_ARCH6K) + ARM_ARCH("armv6z", arm1176jzs, 6Z, FL_CO_PROC | FL_FOR_ARCH6Z) +-ARM_ARCH("armv6zk", arm1176jzs, 6ZK, FL_CO_PROC | FL_FOR_ARCH6ZK) ++ARM_ARCH("armv6kz", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ) ++ARM_ARCH("armv6zk", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ) + ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC | FL_FOR_ARCH6T2) + ARM_ARCH("armv6-m", cortexm1, 6M, FL_FOR_ARCH6M) + ARM_ARCH("armv6s-m", cortexm1, 6M, FL_FOR_ARCH6M) +--- a/src/gcc/config/arm/arm-builtins.c ++++ b/src/gcc/config/arm/arm-builtins.c +@@ -89,7 +89,9 @@ enum arm_type_qualifiers + /* qualifier_const_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ +- qualifier_poly = 0x100 ++ qualifier_poly = 0x100, ++ /* Lane indices - must be within range of previous argument = a vector. */ ++ qualifier_lane_index = 0x200 + }; + + /* The qualifier_internal allows generation of a unary builtin from +@@ -120,21 +122,40 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + + /* T (T, immediate). */ + static enum arm_type_qualifiers +-arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_immediate }; ++#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers) ++ ++/* T (T, lane index). */ ++static enum arm_type_qualifiers ++arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_lane_index }; + #define GETLANE_QUALIFIERS (arm_getlane_qualifiers) + + /* T (T, T, T, immediate). */ + static enum arm_type_qualifiers +-arm_lanemac_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++arm_mac_n_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_immediate }; +-#define LANEMAC_QUALIFIERS (arm_lanemac_qualifiers) ++#define MAC_N_QUALIFIERS (arm_mac_n_qualifiers) ++ ++/* T (T, T, T, lane index). */ ++static enum arm_type_qualifiers ++arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_none, ++ qualifier_none, qualifier_lane_index }; ++#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers) + + /* T (T, T, immediate). */ + static enum arm_type_qualifiers +-arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; ++#define TERNOP_IMM_QUALIFIERS (arm_ternop_imm_qualifiers) ++ ++/* T (T, T, lane index). */ ++static enum arm_type_qualifiers ++arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; + #define SETLANE_QUALIFIERS (arm_setlane_qualifiers) + + /* T (T, T). */ +@@ -525,12 +546,16 @@ enum arm_builtins + #undef CRYPTO2 + #undef CRYPTO3 + ++ ARM_BUILTIN_NEON_BASE, ++ ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE, ++ + #include "arm_neon_builtins.def" + + ARM_BUILTIN_MAX + }; + +-#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) ++#define ARM_BUILTIN_NEON_PATTERN_START \ ++ (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) + + #undef CF + #undef VAR1 +@@ -889,7 +914,7 @@ arm_init_simd_builtin_scalar_types (void) + static void + arm_init_neon_builtins (void) + { +- unsigned int i, fcode = ARM_BUILTIN_NEON_BASE; ++ unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START; + + arm_init_simd_builtin_types (); + +@@ -899,6 +924,15 @@ arm_init_neon_builtins (void) + system. */ + arm_init_simd_builtin_scalar_types (); + ++ tree lane_check_fpr = build_function_type_list (void_type_node, ++ intSI_type_node, ++ intSI_type_node, ++ NULL); ++ arm_builtin_decls[ARM_BUILTIN_NEON_LANE_CHECK] = ++ add_builtin_function ("__builtin_arm_lane_check", lane_check_fpr, ++ ARM_BUILTIN_NEON_LANE_CHECK, BUILT_IN_MD, ++ NULL, NULL_TREE); ++ + for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++) + { + bool print_type_signature_p = false; +@@ -1939,6 +1973,7 @@ arm_expand_unop_builtin (enum insn_code icode, + typedef enum { + NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, ++ NEON_ARG_LANE_INDEX, + NEON_ARG_MEMORY, + NEON_ARG_STOP + } builtin_arg; +@@ -2055,6 +2090,16 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + ++ case NEON_ARG_LANE_INDEX: ++ /* Previous argument must be a vector, which this indexes. */ ++ gcc_assert (argc > 0); ++ if (CONST_INT_P (op[argc])) ++ { ++ enum machine_mode vmode = mode[argc - 1]; ++ neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp); ++ } ++ /* Fall through - if the lane index isn't a constant then ++ the next case will error. */ + case NEON_ARG_CONSTANT: + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) +@@ -2151,14 +2196,31 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, + return target; + } + +-/* Expand a Neon builtin. These are "special" because they don't have symbolic ++/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds. ++ Most of these are "special" because they don't have symbolic + constants defined per-instruction or per instruction-variant. Instead, the + required info is looked up in the table neon_builtin_data. */ + static rtx + arm_expand_neon_builtin (int fcode, tree exp, rtx target) + { ++ if (fcode == ARM_BUILTIN_NEON_LANE_CHECK) ++ { ++ /* Builtin is only to check bounds of the lane passed to some intrinsics ++ that are implemented with gcc vector extensions in arm_neon.h. */ ++ ++ tree nlanes = CALL_EXPR_ARG (exp, 0); ++ gcc_assert (TREE_CODE (nlanes) == INTEGER_CST); ++ rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1)); ++ if (CONST_INT_P (lane_idx)) ++ neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp); ++ else ++ error ("%Klane index must be a constant immediate", exp); ++ /* Don't generate any RTL. */ ++ return const0_rtx; ++ } ++ + neon_builtin_datum *d = +- &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE]; ++ &neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START]; + enum insn_code icode = d->code; + builtin_arg args[SIMD_MAX_BUILTIN_ARGS]; + int num_args = insn_data[d->code].n_operands; +@@ -2182,7 +2244,9 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) + int operands_k = k - is_void; + int expr_args_k = k - 1; + +- if (d->qualifiers[qualifiers_k] & qualifier_immediate) ++ if (d->qualifiers[qualifiers_k] & qualifier_lane_index) ++ args[k] = NEON_ARG_LANE_INDEX; ++ else if (d->qualifiers[qualifiers_k] & qualifier_immediate) + args[k] = NEON_ARG_CONSTANT; + else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) + { +--- a/src/gcc/config/arm/arm-c.c ++++ b/src/gcc/config/arm/arm-c.c +@@ -31,7 +31,11 @@ + #include "wide-int.h" + #include "inchash.h" + #include "tree.h" ++#include "tm_p.h" + #include "c-family/c-common.h" ++#include "target.h" ++#include "target-def.h" ++#include "c-family/c-pragma.h" + + /* Output C specific EABI object attributes. These can not be done in + arm.c because they require information from the C frontend. */ +@@ -51,3 +55,223 @@ arm_lang_object_attributes_init (void) + { + arm_lang_output_object_attributes_hook = arm_output_c_attributes; + } ++ ++#define builtin_define(TXT) cpp_define (pfile, TXT) ++#define builtin_assert(TXT) cpp_assert (pfile, TXT) ++ ++/* Define or undefine macros based on the current target. If the user does ++ #pragma GCC target, we need to adjust the macros dynamically. */ ++ ++static void ++def_or_undef_macro(struct cpp_reader* pfile, const char *name, bool def_p) ++{ ++ if (def_p) ++ cpp_define (pfile, name); ++ else ++ cpp_undef (pfile, name); ++} ++ ++static void ++arm_cpu_builtins (struct cpp_reader* pfile) ++{ ++ def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY); ++ def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); ++ def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT); ++ if (TARGET_CRYPTO) ++ builtin_define ("__ARM_FEATURE_CRYPTO"); ++ if (unaligned_access) ++ builtin_define ("__ARM_FEATURE_UNALIGNED"); ++ if (TARGET_CRC32) ++ builtin_define ("__ARM_FEATURE_CRC32"); ++ ++ def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); ++ ++ if (TARGET_ARM_FEATURE_LDREX) ++ builtin_define_with_int_value ("__ARM_FEATURE_LDREX", ++ TARGET_ARM_FEATURE_LDREX); ++ else ++ cpp_undef (pfile, "__ARM_FEATURE_LDREX"); ++ ++ def_or_undef_macro (pfile, "__ARM_FEATURE_CLZ", ++ ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB) ++ || TARGET_ARM_ARCH_ISA_THUMB >=2)); ++ ++ def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD); ++ ++ builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", ++ flag_short_enums ? 1 : 4); ++ builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", wchar_type_node); ++ if (TARGET_ARM_ARCH_PROFILE) ++ builtin_define_with_int_value ("__ARM_ARCH_PROFILE", ++ TARGET_ARM_ARCH_PROFILE); ++ ++ /* Define __arm__ even when in thumb mode, for ++ consistency with armcc. */ ++ builtin_define ("__arm__"); ++ if (TARGET_ARM_ARCH) ++ builtin_define_with_int_value ("__ARM_ARCH", TARGET_ARM_ARCH); ++ if (arm_arch_notm) ++ builtin_define ("__ARM_ARCH_ISA_ARM"); ++ builtin_define ("__APCS_32__"); ++ ++ def_or_undef_macro (pfile, "__thumb__", TARGET_THUMB); ++ def_or_undef_macro (pfile, "__thumb2__", TARGET_THUMB2); ++ if (TARGET_BIG_END) ++ def_or_undef_macro (pfile, "__THUMBEB__", TARGET_THUMB); ++ else ++ def_or_undef_macro (pfile, "__THUMBEL__", TARGET_THUMB); ++ ++ if (TARGET_ARM_ARCH_ISA_THUMB) ++ builtin_define_with_int_value ("__ARM_ARCH_ISA_THUMB", ++ TARGET_ARM_ARCH_ISA_THUMB); ++ ++ if (TARGET_BIG_END) ++ { ++ builtin_define ("__ARMEB__"); ++ builtin_define ("__ARM_BIG_ENDIAN"); ++ } ++ else ++ { ++ builtin_define ("__ARMEL__"); ++ } ++ ++ if (TARGET_SOFT_FLOAT) ++ builtin_define ("__SOFTFP__"); ++ ++ if (TARGET_VFP) ++ builtin_define ("__VFP_FP__"); ++ ++ if (TARGET_ARM_FP) ++ builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); ++ if (arm_fp16_format == ARM_FP16_FORMAT_IEEE) ++ builtin_define ("__ARM_FP16_FORMAT_IEEE"); ++ if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) ++ builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE"); ++ if (TARGET_FMA) ++ builtin_define ("__ARM_FEATURE_FMA"); ++ ++ if (TARGET_NEON) ++ { ++ builtin_define ("__ARM_NEON__"); ++ builtin_define ("__ARM_NEON"); ++ } ++ if (TARGET_NEON_FP) ++ builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP); ++ ++ /* Add a define for interworking. Needed when building libgcc.a. */ ++ if (arm_cpp_interwork) ++ builtin_define ("__THUMB_INTERWORK__"); ++ ++ ++ builtin_define (arm_arch_name); ++ if (arm_arch_xscale) ++ builtin_define ("__XSCALE__"); ++ if (arm_arch_iwmmxt) ++ { ++ builtin_define ("__IWMMXT__"); ++ builtin_define ("__ARM_WMMX"); ++ } ++ if (arm_arch_iwmmxt2) ++ builtin_define ("__IWMMXT2__"); ++ /* ARMv6KZ was originally identified as the misspelled __ARM_ARCH_6ZK__. To ++ preserve the existing behaviour, the misspelled feature macro must still be ++ defined. */ ++ if (arm_arch6kz) ++ builtin_define ("__ARM_ARCH_6ZK__"); ++ if (TARGET_AAPCS_BASED) ++ { ++ if (arm_pcs_default == ARM_PCS_AAPCS_VFP) ++ builtin_define ("__ARM_PCS_VFP"); ++ else if (arm_pcs_default == ARM_PCS_AAPCS) ++ builtin_define ("__ARM_PCS"); ++ builtin_define ("__ARM_EABI__"); ++ } ++ ++ def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV); ++ def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV); ++ ++ def_or_undef_macro (pfile, "__ARM_ASM_SYNTAX_UNIFIED__", inline_asm_unified); ++} ++ ++void ++arm_cpu_cpp_builtins (struct cpp_reader * pfile) ++{ ++ builtin_assert ("cpu=arm"); ++ builtin_assert ("machine=arm"); ++ ++ arm_cpu_builtins (pfile); ++} ++ ++/* Hook to validate the current #pragma GCC target and set the arch custom ++ mode state. If ARGS is NULL, then POP_TARGET is used to reset ++ the options. */ ++static bool ++arm_pragma_target_parse (tree args, tree pop_target) ++{ ++ tree prev_tree = build_target_option_node (&global_options); ++ tree cur_tree; ++ struct cl_target_option *prev_opt; ++ struct cl_target_option *cur_opt; ++ ++ if (! args) ++ { ++ cur_tree = ((pop_target) ? pop_target : target_option_default_node); ++ cl_target_option_restore (&global_options, ++ TREE_TARGET_OPTION (cur_tree)); ++ } ++ else ++ { ++ cur_tree = arm_valid_target_attribute_tree (args, &global_options, ++ &global_options_set); ++ if (cur_tree == NULL_TREE) ++ { ++ cl_target_option_restore (&global_options, ++ TREE_TARGET_OPTION (prev_tree)); ++ return false; ++ } ++ } ++ ++ target_option_current_node = cur_tree; ++ arm_reset_previous_fndecl (); ++ ++ /* Figure out the previous mode. */ ++ prev_opt = TREE_TARGET_OPTION (prev_tree); ++ cur_opt = TREE_TARGET_OPTION (cur_tree); ++ ++ gcc_assert (prev_opt); ++ gcc_assert (cur_opt); ++ ++ if (cur_opt->x_target_flags != prev_opt->x_target_flags) ++ { ++ /* For the definitions, ensure all newly defined macros are considered ++ as used for -Wunused-macros. There is no point warning about the ++ compiler predefined macros. */ ++ cpp_options *cpp_opts = cpp_get_options (parse_in); ++ unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros; ++ cpp_opts->warn_unused_macros = 0; ++ ++ /* Update macros. */ ++ gcc_assert (cur_opt->x_target_flags == target_flags); ++ arm_cpu_builtins (parse_in); ++ ++ cpp_opts->warn_unused_macros = saved_warn_unused_macros; ++ } ++ ++ return true; ++} ++ ++/* Register target pragmas. We need to add the hook for parsing #pragma GCC ++ option here rather than in arm.c since it will pull in various preprocessor ++ functions, and those are not present in languages like fortran without a ++ preprocessor. */ ++ ++void ++arm_register_target_pragmas (void) ++{ ++ /* Update pragma hook to allow parsing #pragma GCC target. */ ++ targetm.target_option.pragma_parse = arm_pragma_target_parse; ++ ++#ifdef REGISTER_SUBTARGET_PRAGMAS ++ REGISTER_SUBTARGET_PRAGMAS (); ++#endif ++} +--- a/src/gcc/config/arm/arm-cores.def ++++ b/src/gcc/config/arm/arm-cores.def +@@ -125,8 +125,8 @@ ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, FL_LDSCHED, 9e) + /* V6 Architecture Processors */ + ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, FL_LDSCHED, 9e) + ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, FL_LDSCHED | FL_VFPV2, 9e) +-ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6ZK, FL_LDSCHED, 9e) +-ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) ++ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6KZ, FL_LDSCHED, 9e) ++ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6KZ, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, FL_LDSCHED, 9e) + ARM_CORE("mpcore", mpcore, mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, FL_LDSCHED, v6t2) +@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex + ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7) + ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) + ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) +-ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) ++ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4) + + /* V7 big.LITTLE implementations */ + ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) +--- a/src/gcc/config/arm/arm-protos.h ++++ b/src/gcc/config/arm/arm-protos.h +@@ -30,6 +30,7 @@ extern void arm_load_pic_register (unsigned long); + extern int arm_volatile_func (void); + extern void arm_expand_prologue (void); + extern void arm_expand_epilogue (bool); ++extern void arm_declare_function_name (FILE *, const char *, tree); + extern void thumb2_expand_return (bool); + extern const char *arm_strip_name_encoding (const char *); + extern void arm_asm_output_labelref (FILE *, const char *); +@@ -66,10 +67,6 @@ extern rtx legitimize_tls_address (rtx, rtx); + extern bool arm_legitimate_address_p (machine_mode, rtx, bool); + extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int); + extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT); +-extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int, +- int); +-extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int, +- int); + extern int thumb1_legitimate_address_p (machine_mode, rtx, int); + extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode, + bool, bool); +@@ -89,7 +86,7 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode, + extern rtx neon_make_constant (rtx); + extern tree arm_builtin_vectorized_function (tree, tree, tree); + extern void neon_expand_vector_init (rtx, rtx); +-extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); ++extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); + extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern HOST_WIDE_INT neon_element_bits (machine_mode); + extern void neon_reinterpret (rtx, rtx); +@@ -185,9 +182,6 @@ extern const char *thumb1_unexpanded_epilogue (void); + extern void thumb1_expand_prologue (void); + extern void thumb1_expand_epilogue (void); + extern const char *thumb1_output_interwork (void); +-#ifdef TREE_CODE +-extern int is_called_in_ARM_mode (tree); +-#endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE + extern enum arm_cond_code maybe_get_arm_condition_code (rtx); +@@ -216,14 +210,15 @@ extern int arm_dllexport_p (tree); + extern int arm_dllimport_p (tree); + extern void arm_mark_dllexport (tree); + extern void arm_mark_dllimport (tree); ++extern bool arm_change_mode_p (tree); + #endif + ++extern tree arm_valid_target_attribute_tree (tree, struct gcc_options *, ++ struct gcc_options *); + extern void arm_pr_long_calls (struct cpp_reader *); + extern void arm_pr_no_long_calls (struct cpp_reader *); + extern void arm_pr_long_calls_off (struct cpp_reader *); + +-extern void arm_lang_object_attributes_init(void); +- + extern const char *arm_mangle_type (const_tree); + extern const char *arm_mangle_builtin_type (const_tree); + +@@ -257,13 +252,6 @@ struct cpu_vec_costs { + + struct cpu_cost_table; + +-enum arm_sched_autopref +- { +- ARM_SCHED_AUTOPREF_OFF, +- ARM_SCHED_AUTOPREF_RANK, +- ARM_SCHED_AUTOPREF_FULL +- }; +- + /* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this + structure is modified. */ + +@@ -272,39 +260,58 @@ struct tune_params + bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + const struct cpu_cost_table *insn_extra_cost; + bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *); ++ int (*branch_cost) (bool, bool); ++ /* Vectorizer costs. */ ++ const struct cpu_vec_costs* vec_costs; + int constant_limit; + /* Maximum number of instructions to conditionalise. */ + int max_insns_skipped; +- int num_prefetch_slots; +- int l1_cache_size; +- int l1_cache_line_size; +- bool prefer_constant_pool; +- int (*branch_cost) (bool, bool); ++ /* Maximum number of instructions to inline calls to memset. */ ++ int max_insns_inline_memset; ++ /* Issue rate of the processor. */ ++ unsigned int issue_rate; ++ /* Explicit prefetch data. */ ++ struct ++ { ++ int num_slots; ++ int l1_cache_size; ++ int l1_cache_line_size; ++ } prefetch; ++ enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE} ++ prefer_constant_pool: 1; + /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */ +- bool prefer_ldrd_strd; ++ enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1; + /* The preference for non short cirtcuit operation when optimizing for + performance. The first element covers Thumb state and the second one + is for ARM state. */ +- bool logical_op_non_short_circuit[2]; +- /* Vectorizer costs. */ +- const struct cpu_vec_costs* vec_costs; +- /* Prefer Neon for 64-bit bitops. */ +- bool prefer_neon_for_64bits; ++ enum log_op_non_short_circuit {LOG_OP_NON_SHORT_CIRCUIT_FALSE, ++ LOG_OP_NON_SHORT_CIRCUIT_TRUE}; ++ log_op_non_short_circuit logical_op_non_short_circuit_thumb: 1; ++ log_op_non_short_circuit logical_op_non_short_circuit_arm: 1; + /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ +- bool disparage_flag_setting_t16_encodings; +- /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags +- would be set. */ +- bool disparage_partial_flag_setting_t16_encodings; ++ enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL} ++ disparage_flag_setting_t16_encodings: 2; ++ enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1; + /* Prefer to inline string operations like memset by using Neon. */ +- bool string_ops_prefer_neon; +- /* Maximum number of instructions to inline calls to memset. */ +- int max_insns_inline_memset; +- /* Bitfield encoding the fuseable pairs of instructions. */ +- unsigned int fuseable_ops; ++ enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE} ++ string_ops_prefer_neon: 1; ++ /* Bitfield encoding the fusible pairs of instructions. Use FUSE_OPS ++ in an initializer if multiple fusion operations are supported on a ++ target. */ ++ enum fuse_ops ++ { ++ FUSE_NOTHING = 0, ++ FUSE_MOVW_MOVT = 1 << 0 ++ } fusible_ops: 1; + /* Depth of scheduling queue to check for L2 autoprefetcher. */ +- enum arm_sched_autopref sched_autopref; ++ enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL} ++ sched_autopref: 2; + }; + ++/* Smash multiple fusion operations into a type that can be used for an ++ initializer. */ ++#define FUSE_OPS(x) ((tune_params::fuse_ops) (x)) ++ + extern const struct tune_params *current_tune; + extern int vfp3_const_double_for_fract_bits (rtx); + /* return power of two from operand, otherwise 0. */ +@@ -324,9 +331,16 @@ extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes); + + extern void arm_emit_eabi_attribute (const char *, int, int); + ++extern void arm_reset_previous_fndecl (void); ++ + /* Defined in gcc/common/config/arm-common.c. */ + extern const char *arm_rewrite_selected_cpu (const char *name); + ++/* Defined in gcc/common/config/arm-c.c. */ ++extern void arm_lang_object_attributes_init (void); ++extern void arm_register_target_pragmas (void); ++extern void arm_cpu_cpp_builtins (struct cpp_reader *); ++ + extern bool arm_is_constant_pool_ref (rtx); + + /* Flags used to identify the presence of processor capabilities. */ +@@ -368,6 +382,7 @@ extern bool arm_is_constant_pool_ref (rtx); + + #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ + #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ ++#define FL_ARCH6KZ (1 << 31) /* ARMv6KZ architecture. */ + + /* Flags that only effect tuning, not available instructions. */ + #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ +@@ -387,7 +402,7 @@ extern bool arm_is_constant_pool_ref (rtx); + #define FL_FOR_ARCH6J FL_FOR_ARCH6 + #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) + #define FL_FOR_ARCH6Z FL_FOR_ARCH6 +-#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K ++#define FL_FOR_ARCH6KZ (FL_FOR_ARCH6K | FL_ARCH6KZ) + #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) + #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) + #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) +@@ -427,6 +442,9 @@ extern int arm_arch6; + /* Nonzero if this chip supports the ARM 6K extensions. */ + extern int arm_arch6k; + ++/* Nonzero if this chip supports the ARM 6KZ extensions. */ ++extern int arm_arch6kz; ++ + /* Nonzero if instructions present in ARMv6-M can be used. */ + extern int arm_arch6m; + +@@ -467,12 +485,6 @@ extern int arm_tune_wbuf; + /* Nonzero if tuning for Cortex-A9. */ + extern int arm_tune_cortex_a9; + +-/* Nonzero if generating Thumb instructions. */ +-extern int thumb_code; +- +-/* Nonzero if generating Thumb-1 instructions. */ +-extern int thumb1_code; +- + /* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around +--- a/src/gcc/config/arm/arm-tables.opt ++++ b/src/gcc/config/arm/arm-tables.opt +@@ -371,46 +371,49 @@ EnumValue + Enum(arm_arch) String(armv6z) Value(13) + + EnumValue +-Enum(arm_arch) String(armv6zk) Value(14) ++Enum(arm_arch) String(armv6kz) Value(14) + + EnumValue +-Enum(arm_arch) String(armv6t2) Value(15) ++Enum(arm_arch) String(armv6zk) Value(15) + + EnumValue +-Enum(arm_arch) String(armv6-m) Value(16) ++Enum(arm_arch) String(armv6t2) Value(16) + + EnumValue +-Enum(arm_arch) String(armv6s-m) Value(17) ++Enum(arm_arch) String(armv6-m) Value(17) + + EnumValue +-Enum(arm_arch) String(armv7) Value(18) ++Enum(arm_arch) String(armv6s-m) Value(18) + + EnumValue +-Enum(arm_arch) String(armv7-a) Value(19) ++Enum(arm_arch) String(armv7) Value(19) + + EnumValue +-Enum(arm_arch) String(armv7ve) Value(20) ++Enum(arm_arch) String(armv7-a) Value(20) + + EnumValue +-Enum(arm_arch) String(armv7-r) Value(21) ++Enum(arm_arch) String(armv7ve) Value(21) + + EnumValue +-Enum(arm_arch) String(armv7-m) Value(22) ++Enum(arm_arch) String(armv7-r) Value(22) + + EnumValue +-Enum(arm_arch) String(armv7e-m) Value(23) ++Enum(arm_arch) String(armv7-m) Value(23) + + EnumValue +-Enum(arm_arch) String(armv8-a) Value(24) ++Enum(arm_arch) String(armv7e-m) Value(24) + + EnumValue +-Enum(arm_arch) String(armv8-a+crc) Value(25) ++Enum(arm_arch) String(armv8-a) Value(25) + + EnumValue +-Enum(arm_arch) String(iwmmxt) Value(26) ++Enum(arm_arch) String(armv8-a+crc) Value(26) + + EnumValue +-Enum(arm_arch) String(iwmmxt2) Value(27) ++Enum(arm_arch) String(iwmmxt) Value(27) ++ ++EnumValue ++Enum(arm_arch) String(iwmmxt2) Value(28) + + Enum + Name(arm_fpu) Type(int) +--- a/src/gcc/config/arm/arm.c ++++ b/src/gcc/config/arm/arm.c +@@ -94,10 +94,12 @@ + #include "opts.h" + #include "dumpfile.h" + #include "gimple-expr.h" ++#include "target-globals.h" + #include "builtins.h" + #include "tm-constrs.h" + #include "rtl-iter.h" + #include "sched-int.h" ++#include "tree.h" + + /* Forward definitions of types. */ + typedef struct minipool_node Mnode; +@@ -121,6 +123,7 @@ static int arm_gen_constant (enum rtx_code, machine_mode, rtx, + static unsigned bit_count (unsigned long); + static int arm_address_register_rtx_p (rtx, int); + static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int); ++static bool is_called_in_ARM_mode (tree); + static int thumb2_legitimate_index_p (machine_mode, rtx, int); + static int thumb1_base_register_rtx_p (rtx, machine_mode, int); + static rtx arm_legitimize_address (rtx, rtx, machine_mode); +@@ -231,6 +234,7 @@ static void arm_encode_section_info (tree, rtx, int); + + static void arm_file_end (void); + static void arm_file_start (void); ++static void arm_insert_attributes (tree, tree *); + + static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode, + tree, int *, int); +@@ -264,6 +268,10 @@ static tree arm_build_builtin_va_list (void); + static void arm_expand_builtin_va_start (tree, rtx); + static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); + static void arm_option_override (void); ++static void arm_option_print (FILE *, int, struct cl_target_option *); ++static void arm_set_current_function (tree); ++static bool arm_can_inline_p (tree, tree); ++static bool arm_valid_target_attribute_p (tree, tree, tree, int); + static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); + static bool arm_macro_fusion_p (void); + static bool arm_cannot_copy_insn_p (rtx_insn *); +@@ -386,6 +394,9 @@ static const struct attribute_spec arm_attribute_table[] = + #undef TARGET_ATTRIBUTE_TABLE + #define TARGET_ATTRIBUTE_TABLE arm_attribute_table + ++#undef TARGET_INSERT_ATTRIBUTES ++#define TARGET_INSERT_ATTRIBUTES arm_insert_attributes ++ + #undef TARGET_ASM_FILE_START + #define TARGET_ASM_FILE_START arm_file_start + #undef TARGET_ASM_FILE_END +@@ -412,9 +423,15 @@ static const struct attribute_spec arm_attribute_table[] = + #undef TARGET_ASM_FUNCTION_EPILOGUE + #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue + ++#undef TARGET_CAN_INLINE_P ++#define TARGET_CAN_INLINE_P arm_can_inline_p ++ + #undef TARGET_OPTION_OVERRIDE + #define TARGET_OPTION_OVERRIDE arm_option_override + ++#undef TARGET_OPTION_PRINT ++#define TARGET_OPTION_PRINT arm_option_print ++ + #undef TARGET_COMP_TYPE_ATTRIBUTES + #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes + +@@ -430,6 +447,12 @@ static const struct attribute_spec arm_attribute_table[] = + #undef TARGET_SCHED_ADJUST_COST + #define TARGET_SCHED_ADJUST_COST arm_adjust_cost + ++#undef TARGET_SET_CURRENT_FUNCTION ++#define TARGET_SET_CURRENT_FUNCTION arm_set_current_function ++ ++#undef TARGET_OPTION_VALID_ATTRIBUTE_P ++#define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p ++ + #undef TARGET_SCHED_REORDER + #define TARGET_SCHED_REORDER arm_sched_reorder + +@@ -806,6 +829,9 @@ int arm_arch6 = 0; + /* Nonzero if this chip supports the ARM 6K extensions. */ + int arm_arch6k = 0; + ++/* Nonzero if this chip supports the ARM 6KZ extensions. */ ++int arm_arch6kz = 0; ++ + /* Nonzero if instructions present in ARMv6-M can be used. */ + int arm_arch6m = 0; + +@@ -846,12 +872,6 @@ int arm_tune_wbuf = 0; + /* Nonzero if tuning for Cortex-A9. */ + int arm_tune_cortex_a9 = 0; + +-/* Nonzero if generating Thumb instructions. */ +-int thumb_code = 0; +- +-/* Nonzero if generating Thumb-1 instructions. */ +-int thumb1_code = 0; +- + /* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around +@@ -940,11 +960,13 @@ struct processors + }; + + +-#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 +-#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ +- prefetch_slots, \ +- l1_size, \ +- l1_line_size ++#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 } ++#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \ ++ { \ ++ num_slots, \ ++ l1_size, \ ++ l1_line_size \ ++ } + + /* arm generic vectorizer costs. */ + static const +@@ -1027,7 +1049,9 @@ const struct cpu_cost_table cortexa9_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* storef. */ + COSTS_N_INSNS (1), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1128,7 +1152,9 @@ const struct cpu_cost_table cortexa8_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* storef. */ + COSTS_N_INSNS (1), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1230,7 +1256,9 @@ const struct cpu_cost_table cortexa5_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1333,7 +1361,9 @@ const struct cpu_cost_table cortexa7_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1434,7 +1464,9 @@ const struct cpu_cost_table cortexa12_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ +- 0 /* store_unaligned. */ ++ 0, /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1535,7 +1567,9 @@ const struct cpu_cost_table cortexa15_extra_costs = + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ +- 0 /* store_unaligned. */ ++ 0, /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1636,7 +1670,9 @@ const struct cpu_cost_table v7m_extra_costs = + 1, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (3), /* stored. */ +- COSTS_N_INSNS (1) /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (1), /* loadv. */ ++ COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ +@@ -1678,49 +1714,50 @@ const struct cpu_cost_table v7m_extra_costs = + } + }; + +-#define ARM_FUSE_NOTHING (0) +-#define ARM_FUSE_MOVW_MOVT (1 << 0) +- + const struct tune_params arm_slowmul_tune = + { + arm_slowmul_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 3, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_fastmul_tune = + { + arm_fastmul_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + /* StrongARM has early execution of branches, so a sequence that is worth +@@ -1729,233 +1766,279 @@ const struct tune_params arm_fastmul_tune = + const struct tune_params arm_strongarm_tune = + { + arm_fastmul_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 3, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_xscale_tune = + { + arm_xscale_rtx_costs, +- NULL, ++ NULL, /* Insn extra costs. */ + xscale_sched_adjust_cost, ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 2, /* Constant limit. */ + 3, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_9e_tune = + { + arm_9e_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF ++}; ++ ++const struct tune_params arm_marvell_pj4_tune = ++{ ++ arm_9e_rtx_costs, ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ + arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ &arm_default_vec_cost, ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_v6t2_tune = + { + arm_9e_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + ++ + /* Generic Cortex tuning. Use more specific tunings if appropriate. */ + const struct tune_params arm_cortex_tune = + { + arm_9e_rtx_costs, + &generic_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a8_tune = + { + arm_9e_rtx_costs, + &cortexa8_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a7_tune = + { + arm_9e_rtx_costs, + &cortexa7_extra_costs, +- NULL, ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a15_tune = + { + arm_9e_rtx_costs, + &cortexa15_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 2, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 3, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- true, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- true, true, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_TRUE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_ALL, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_FULL + }; + + const struct tune_params arm_cortex_a53_tune = + { + arm_9e_rtx_costs, + &cortexa53_extra_costs, +- NULL, /* Scheduler cost adjustment. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a57_tune = + { + arm_9e_rtx_costs, + &cortexa57_extra_costs, +- NULL, /* Scheduler cost adjustment. */ +- 1, /* Constant limit. */ +- 2, /* Max cond insns. */ +- ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ ++ NULL, /* Sched adj cost. */ + arm_default_branch_cost, +- true, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- true, true, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ ++ &arm_default_vec_cost, ++ 1, /* Constant limit. */ ++ 2, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 3, /* Issue rate. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_TRUE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_ALL, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), ++ tune_params::SCHED_AUTOPREF_FULL + }; + + const struct tune_params arm_xgene1_tune = + { + arm_9e_rtx_costs, + &xgene1_extra_costs, +- NULL, /* Scheduler cost adjustment. */ +- 1, /* Constant limit. */ +- 2, /* Max cond insns. */ +- ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ ++ NULL, /* Sched adj cost. */ + arm_default_branch_cost, +- true, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- true, true, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 32, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ &arm_default_vec_cost, ++ 1, /* Constant limit. */ ++ 2, /* Max cond insns. */ ++ 32, /* Memset max inline. */ ++ 4, /* Issue rate. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_TRUE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_ALL, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + /* Branches can be dual-issued on Cortex-A5, so conditional execution is +@@ -1965,21 +2048,23 @@ const struct tune_params arm_cortex_a5_tune = + { + arm_9e_rtx_costs, + &cortexa5_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_cortex_a5_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 1, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_cortex_a5_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {false, false}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a9_tune = +@@ -1987,41 +2072,45 @@ const struct tune_params arm_cortex_a9_tune = + arm_9e_rtx_costs, + &cortexa9_extra_costs, + cortex_a9_sched_adjust_cost, ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_BENEFICIAL(4,32,32), +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_cortex_a12_tune = + { + arm_9e_rtx_costs, + &cortexa12_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, /* Vectorizer costs. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- true, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- true, true, /* Prefer 32-bit encodings. */ +- true, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_TRUE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_ALL, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_TRUE, ++ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), ++ tune_params::SCHED_AUTOPREF_OFF + }; + + /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single +@@ -2035,21 +2124,23 @@ const struct tune_params arm_v7m_tune = + { + arm_9e_rtx_costs, + &v7m_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_cortex_m_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 2, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_cortex_m_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {false, false}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + /* Cortex-M7 tuning. */ +@@ -2058,21 +2149,23 @@ const struct tune_params arm_cortex_m7_tune = + { + arm_9e_rtx_costs, + &v7m_extra_costs, +- NULL, /* Sched adj cost. */ ++ NULL, /* Sched adj cost. */ ++ arm_cortex_m7_branch_cost, ++ &arm_default_vec_cost, + 0, /* Constant limit. */ + 1, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_cortex_m7_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than +@@ -2080,43 +2173,47 @@ const struct tune_params arm_cortex_m7_tune = + const struct tune_params arm_v6m_tune = + { + arm_9e_rtx_costs, +- NULL, +- NULL, /* Sched adj cost. */ ++ NULL, /* Insn extra costs. */ ++ NULL, /* Sched adj cost. */ ++ arm_default_branch_cost, ++ &arm_default_vec_cost, /* Vectorizer costs. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 1, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- false, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {false, false}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_FALSE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + const struct tune_params arm_fa726te_tune = + { + arm_9e_rtx_costs, +- NULL, ++ NULL, /* Insn extra costs. */ + fa726te_sched_adjust_cost, ++ arm_default_branch_cost, ++ &arm_default_vec_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ ++ 8, /* Memset max inline. */ ++ 2, /* Issue rate. */ + ARM_PREFETCH_NOT_BENEFICIAL, +- true, /* Prefer constant pool. */ +- arm_default_branch_cost, +- false, /* Prefer LDRD/STRD. */ +- {true, true}, /* Prefer non short circuit. */ +- &arm_default_vec_cost, /* Vectorizer costs. */ +- false, /* Prefer Neon for 64-bits bitops. */ +- false, false, /* Prefer 32-bit encodings. */ +- false, /* Prefer Neon for stringops. */ +- 8, /* Maximum insns to inline memset. */ +- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ +- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ ++ tune_params::PREF_CONST_POOL_TRUE, ++ tune_params::PREF_LDRD_FALSE, ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ ++ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ ++ tune_params::DISPARAGE_FLAGS_NEITHER, ++ tune_params::PREF_NEON_64_FALSE, ++ tune_params::PREF_NEON_STRINGOPS_FALSE, ++ tune_params::FUSE_NOTHING, ++ tune_params::SCHED_AUTOPREF_OFF + }; + + +@@ -2626,6 +2723,171 @@ arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + } + ++/* Check any incompatible options that the user has specified. */ ++static void ++arm_option_check_internal (struct gcc_options *opts) ++{ ++ int flags = opts->x_target_flags; ++ ++ /* Make sure that the processor choice does not conflict with any of the ++ other command line choices. */ ++ if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM)) ++ error ("target CPU does not support ARM mode"); ++ ++ /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done ++ from here where no function is being compiled currently. */ ++ if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags)) ++ warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); ++ ++ if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING) ++ warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); ++ ++ /* If this target is normally configured to use APCS frames, warn if they ++ are turned off and debugging is turned on. */ ++ if (TARGET_ARM_P (flags) ++ && write_symbols != NO_DEBUG ++ && !TARGET_APCS_FRAME ++ && (TARGET_DEFAULT & MASK_APCS_FRAME)) ++ warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); ++ ++ /* iWMMXt unsupported under Thumb mode. */ ++ if (TARGET_THUMB_P (flags) && TARGET_IWMMXT) ++ error ("iWMMXt unsupported under Thumb mode"); ++ ++ if (TARGET_HARD_TP && TARGET_THUMB1_P (flags)) ++ error ("can not use -mtp=cp15 with 16-bit Thumb"); ++ ++ if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic) ++ { ++ error ("RTP PIC is incompatible with Thumb"); ++ flag_pic = 0; ++ } ++ ++ /* We only support -mslow-flash-data on armv7-m targets. */ ++ if (target_slow_flash_data ++ && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em) ++ || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON))) ++ error ("-mslow-flash-data only supports non-pic code on armv7-m targets"); ++} ++ ++/* Recompute the global settings depending on target attribute options. */ ++ ++static void ++arm_option_params_internal (void) ++{ ++ /* If we are not using the default (ARM mode) section anchor offset ++ ranges, then set the correct ranges now. */ ++ if (TARGET_THUMB1) ++ { ++ /* Thumb-1 LDR instructions cannot have negative offsets. ++ Permissible positive offset ranges are 5-bit (for byte loads), ++ 6-bit (for halfword loads), or 7-bit (for word loads). ++ Empirical results suggest a 7-bit anchor range gives the best ++ overall code size. */ ++ targetm.min_anchor_offset = 0; ++ targetm.max_anchor_offset = 127; ++ } ++ else if (TARGET_THUMB2) ++ { ++ /* The minimum is set such that the total size of the block ++ for a particular anchor is 248 + 1 + 4095 bytes, which is ++ divisible by eight, ensuring natural spacing of anchors. */ ++ targetm.min_anchor_offset = -248; ++ targetm.max_anchor_offset = 4095; ++ } ++ else ++ { ++ targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET; ++ targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET; ++ } ++ ++ if (optimize_size) ++ { ++ /* If optimizing for size, bump the number of instructions that we ++ are prepared to conditionally execute (even on a StrongARM). */ ++ max_insns_skipped = 6; ++ ++ /* For THUMB2, we limit the conditional sequence to one IT block. */ ++ if (TARGET_THUMB2) ++ max_insns_skipped = arm_restrict_it ? 1 : 4; ++ } ++ else ++ /* When -mrestrict-it is in use tone down the if-conversion. */ ++ max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it) ++ ? 1 : current_tune->max_insns_skipped; ++} ++ ++/* True if -mflip-thumb should next add an attribute for the default ++ mode, false if it should next add an attribute for the opposite mode. */ ++static GTY(()) bool thumb_flipper; ++ ++/* Options after initial target override. */ ++static GTY(()) tree init_optimize; ++ ++/* Reset options between modes that the user has specified. */ ++static void ++arm_option_override_internal (struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB)) ++ { ++ warning (0, "target CPU does not support THUMB instructions"); ++ opts->x_target_flags &= ~MASK_THUMB; ++ } ++ ++ if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags)) ++ { ++ /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ ++ opts->x_target_flags &= ~MASK_APCS_FRAME; ++ } ++ ++ /* Callee super interworking implies thumb interworking. Adding ++ this to the flags here simplifies the logic elsewhere. */ ++ if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING) ++ opts->x_target_flags |= MASK_INTERWORK; ++ ++ /* need to remember initial values so combinaisons of options like ++ -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */ ++ cl_optimization *to = TREE_OPTIMIZATION (init_optimize); ++ ++ if (! opts_set->x_arm_restrict_it) ++ opts->x_arm_restrict_it = arm_arch8; ++ ++ if (!TARGET_THUMB2_P (opts->x_target_flags)) ++ opts->x_arm_restrict_it = 0; ++ ++ /* Don't warn since it's on by default in -O2. */ ++ if (TARGET_THUMB1_P (opts->x_target_flags)) ++ opts->x_flag_schedule_insns = 0; ++ else ++ opts->x_flag_schedule_insns = to->x_flag_schedule_insns; ++ ++ /* Disable shrink-wrap when optimizing function for size, since it tends to ++ generate additional returns. */ ++ if (optimize_function_for_size_p (cfun) ++ && TARGET_THUMB2_P (opts->x_target_flags)) ++ opts->x_flag_shrink_wrap = false; ++ else ++ opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap; ++ ++ /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn ++ - epilogue_insns - does not accurately model the corresponding insns ++ emitted in the asm file. In particular, see the comment in thumb_exit ++ 'Find out how many of the (return) argument registers we can corrupt'. ++ As a consequence, the epilogue may clobber registers without fipa-ra ++ finding out about it. Therefore, disable fipa-ra in Thumb1 mode. ++ TODO: Accurately model clobbers for epilogue_insns and reenable ++ fipa-ra. */ ++ if (TARGET_THUMB1_P (opts->x_target_flags)) ++ opts->x_flag_ipa_ra = 0; ++ else ++ opts->x_flag_ipa_ra = to->x_flag_ipa_ra; ++ ++ /* Thumb2 inline assembly code should always use unified syntax. ++ This will apply to ARM and Thumb1 eventually. */ ++ opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags); ++} ++ + /* Fix up any incompatible options that the user has specified. */ + static void + arm_option_override (void) +@@ -2772,10 +3034,9 @@ arm_option_override (void) + tune_flags = arm_selected_tune->flags; + current_tune = arm_selected_tune->tune; + +- /* Make sure that the processor choice does not conflict with any of the +- other command line choices. */ +- if (TARGET_ARM && !(insn_flags & FL_NOTM)) +- error ("target CPU does not support ARM mode"); ++ /* TBD: Dwarf info for apcs frame is not handled yet. */ ++ if (TARGET_APCS_FRAME) ++ flag_shrink_wrap = false; + + /* BPABI targets use linker tricks to allow interworking on cores + without thumb support. */ +@@ -2785,31 +3046,6 @@ arm_option_override (void) + target_flags &= ~MASK_INTERWORK; + } + +- if (TARGET_THUMB && !(insn_flags & FL_THUMB)) +- { +- warning (0, "target CPU does not support THUMB instructions"); +- target_flags &= ~MASK_THUMB; +- } +- +- if (TARGET_APCS_FRAME && TARGET_THUMB) +- { +- /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ +- target_flags &= ~MASK_APCS_FRAME; +- } +- +- /* Callee super interworking implies thumb interworking. Adding +- this to the flags here simplifies the logic elsewhere. */ +- if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) +- target_flags |= MASK_INTERWORK; +- +- /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done +- from here where no function is being compiled currently. */ +- if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) +- warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); +- +- if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) +- warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); +- + if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) + { + warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); +@@ -2825,14 +3061,6 @@ arm_option_override (void) + if (TARGET_APCS_REENT) + warning (0, "APCS reentrant code not supported. Ignored"); + +- /* If this target is normally configured to use APCS frames, warn if they +- are turned off and debugging is turned on. */ +- if (TARGET_ARM +- && write_symbols != NO_DEBUG +- && !TARGET_APCS_FRAME +- && (TARGET_DEFAULT & MASK_APCS_FRAME)) +- warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); +- + if (TARGET_APCS_FLOAT) + warning (0, "passing floating point arguments in fp regs not yet supported"); + +@@ -2844,6 +3072,7 @@ arm_option_override (void) + arm_arch5e = (insn_flags & FL_ARCH5E) != 0; + arm_arch6 = (insn_flags & FL_ARCH6) != 0; + arm_arch6k = (insn_flags & FL_ARCH6K) != 0; ++ arm_arch6kz = arm_arch6k && (insn_flags & FL_ARCH6KZ); + arm_arch_notm = (insn_flags & FL_NOTM) != 0; + arm_arch6m = arm_arch6 && !arm_arch_notm; + arm_arch7 = (insn_flags & FL_ARCH7) != 0; +@@ -2854,8 +3083,6 @@ arm_option_override (void) + + arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; + arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; +- thumb_code = TARGET_ARM == 0; +- thumb1_code = TARGET_THUMB1 != 0; + arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; + arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; +@@ -2866,32 +3093,6 @@ arm_option_override (void) + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + arm_arch_crc = (insn_flags & FL_CRC32) != 0; + arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0; +- if (arm_restrict_it == 2) +- arm_restrict_it = arm_arch8 && TARGET_THUMB2; +- +- if (!TARGET_THUMB2) +- arm_restrict_it = 0; +- +- /* If we are not using the default (ARM mode) section anchor offset +- ranges, then set the correct ranges now. */ +- if (TARGET_THUMB1) +- { +- /* Thumb-1 LDR instructions cannot have negative offsets. +- Permissible positive offset ranges are 5-bit (for byte loads), +- 6-bit (for halfword loads), or 7-bit (for word loads). +- Empirical results suggest a 7-bit anchor range gives the best +- overall code size. */ +- targetm.min_anchor_offset = 0; +- targetm.max_anchor_offset = 127; +- } +- else if (TARGET_THUMB2) +- { +- /* The minimum is set such that the total size of the block +- for a particular anchor is 248 + 1 + 4095 bytes, which is +- divisible by eight, ensuring natural spacing of anchors. */ +- targetm.min_anchor_offset = -248; +- targetm.max_anchor_offset = 4095; +- } + + /* V5 code we generate is completely interworking capable, so we turn off + TARGET_INTERWORK here to avoid many tests later on. */ +@@ -2951,10 +3152,6 @@ arm_option_override (void) + if (TARGET_IWMMXT && TARGET_NEON) + error ("iWMMXt and NEON are incompatible"); + +- /* iWMMXt unsupported under Thumb mode. */ +- if (TARGET_THUMB && TARGET_IWMMXT) +- error ("iWMMXt unsupported under Thumb mode"); +- + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); +@@ -2999,9 +3196,6 @@ arm_option_override (void) + target_thread_pointer = TP_SOFT; + } + +- if (TARGET_HARD_TP && TARGET_THUMB1) +- error ("can not use -mtp=cp15 with 16-bit Thumb"); +- + /* Override the default structure alignment for AAPCS ABI. */ + if (!global_options_set.x_arm_structure_size_boundary) + { +@@ -3024,12 +3218,6 @@ arm_option_override (void) + } + } + +- if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic) +- { +- error ("RTP PIC is incompatible with Thumb"); +- flag_pic = 0; +- } +- + /* If stack checking is disabled, we can use r10 as the PIC register, + which keeps r9 available. The EABI specifies r9 as the PIC register. */ + if (flag_pic && TARGET_SINGLE_PIC_BASE) +@@ -3097,25 +3285,6 @@ arm_option_override (void) + unaligned_access = 0; + } + +- if (TARGET_THUMB1 && flag_schedule_insns) +- { +- /* Don't warn since it's on by default in -O2. */ +- flag_schedule_insns = 0; +- } +- +- if (optimize_size) +- { +- /* If optimizing for size, bump the number of instructions that we +- are prepared to conditionally execute (even on a StrongARM). */ +- max_insns_skipped = 6; +- +- /* For THUMB2, we limit the conditional sequence to one IT block. */ +- if (TARGET_THUMB2) +- max_insns_skipped = MAX_INSN_PER_IT_BLOCK; +- } +- else +- max_insns_skipped = current_tune->max_insns_skipped; +- + /* Hot/Cold partitioning is not currently supported, since we can't + handle literal pool placement in that case. */ + if (flag_reorder_blocks_and_partition) +@@ -3140,31 +3309,33 @@ arm_option_override (void) + && abi_version_at_least(2)) + flag_strict_volatile_bitfields = 1; + +- /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed +- it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ ++ /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we ++ have deemed it beneficial (signified by setting ++ prefetch.num_slots to 1 or more). */ + if (flag_prefetch_loop_arrays < 0 + && HAVE_prefetch + && optimize >= 3 +- && current_tune->num_prefetch_slots > 0) ++ && current_tune->prefetch.num_slots > 0) + flag_prefetch_loop_arrays = 1; + +- /* Set up parameters to be used in prefetching algorithm. Do not override the +- defaults unless we are tuning for a core we have researched values for. */ +- if (current_tune->num_prefetch_slots > 0) ++ /* Set up parameters to be used in prefetching algorithm. Do not ++ override the defaults unless we are tuning for a core we have ++ researched values for. */ ++ if (current_tune->prefetch.num_slots > 0) + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, +- current_tune->num_prefetch_slots, +- global_options.x_param_values, +- global_options_set.x_param_values); +- if (current_tune->l1_cache_line_size >= 0) ++ current_tune->prefetch.num_slots, ++ global_options.x_param_values, ++ global_options_set.x_param_values); ++ if (current_tune->prefetch.l1_cache_line_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, +- current_tune->l1_cache_line_size, +- global_options.x_param_values, +- global_options_set.x_param_values); +- if (current_tune->l1_cache_size >= 0) ++ current_tune->prefetch.l1_cache_line_size, ++ global_options.x_param_values, ++ global_options_set.x_param_values); ++ if (current_tune->prefetch.l1_cache_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_SIZE, +- current_tune->l1_cache_size, +- global_options.x_param_values, +- global_options_set.x_param_values); ++ current_tune->prefetch.l1_cache_size, ++ global_options.x_param_values, ++ global_options_set.x_param_values); + + /* Use Neon to perform 64-bits operations rather than core + registers. */ +@@ -3174,67 +3345,63 @@ arm_option_override (void) + + /* Use the alternative scheduling-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, +- global_options.x_param_values, +- global_options_set.x_param_values); ++ global_options.x_param_values, ++ global_options_set.x_param_values); + + /* Look through ready list and all of queue for instructions + relevant for L2 auto-prefetcher. */ + int param_sched_autopref_queue_depth; +- if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF) +- param_sched_autopref_queue_depth = -1; +- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK) +- param_sched_autopref_queue_depth = 0; +- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL) +- param_sched_autopref_queue_depth = max_insn_queue_index + 1; +- else +- gcc_unreachable (); +- maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, +- param_sched_autopref_queue_depth, +- global_options.x_param_values, +- global_options_set.x_param_values); + +- /* Disable shrink-wrap when optimizing function for size, since it tends to +- generate additional returns. */ +- if (optimize_function_for_size_p (cfun) && TARGET_THUMB2) +- flag_shrink_wrap = false; +- /* TBD: Dwarf info for apcs frame is not handled yet. */ +- if (TARGET_APCS_FRAME) +- flag_shrink_wrap = false; ++ switch (current_tune->sched_autopref) ++ { ++ case tune_params::SCHED_AUTOPREF_OFF: ++ param_sched_autopref_queue_depth = -1; ++ break; ++ ++ case tune_params::SCHED_AUTOPREF_RANK: ++ param_sched_autopref_queue_depth = 0; ++ break; ++ ++ case tune_params::SCHED_AUTOPREF_FULL: ++ param_sched_autopref_queue_depth = max_insn_queue_index + 1; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } + +- /* We only support -mslow-flash-data on armv7-m targets. */ +- if (target_slow_flash_data +- && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em) +- || (TARGET_THUMB1 || flag_pic || TARGET_NEON))) +- error ("-mslow-flash-data only supports non-pic code on armv7-m targets"); ++ maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, ++ param_sched_autopref_queue_depth, ++ global_options.x_param_values, ++ global_options_set.x_param_values); + + /* Currently, for slow flash data, we just disable literal pools. */ + if (target_slow_flash_data) + arm_disable_literal_pool = true; + +- /* Thumb2 inline assembly code should always use unified syntax. +- This will apply to ARM and Thumb1 eventually. */ +- if (TARGET_THUMB2) +- inline_asm_unified = 1; +- + /* Disable scheduling fusion by default if it's not armv7 processor + or doesn't prefer ldrd/strd. */ + if (flag_schedule_fusion == 2 + && (!arm_arch7 || !current_tune->prefer_ldrd_strd)) + flag_schedule_fusion = 0; + +- /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn +- - epilogue_insns - does not accurately model the corresponding insns +- emitted in the asm file. In particular, see the comment in thumb_exit +- 'Find out how many of the (return) argument registers we can corrupt'. +- As a consequence, the epilogue may clobber registers without fipa-ra +- finding out about it. Therefore, disable fipa-ra in Thumb1 mode. +- TODO: Accurately model clobbers for epilogue_insns and reenable +- fipa-ra. */ +- if (TARGET_THUMB1) +- flag_ipa_ra = 0; ++ /* Need to remember initial options before they are overriden. */ ++ init_optimize = build_optimization_node (&global_options); ++ ++ arm_option_override_internal (&global_options, &global_options_set); ++ arm_option_check_internal (&global_options); ++ arm_option_params_internal (); + + /* Register global variables with the garbage collector. */ + arm_add_gc_roots (); ++ ++ /* Save the initial options in case the user does function specific ++ options. */ ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (&global_options); ++ ++ /* Init initial mode for testing. */ ++ thumb_flipper = TARGET_THUMB; + } + + static void +@@ -3388,13 +3555,20 @@ arm_warn_func_return (tree decl) + static void + arm_asm_trampoline_template (FILE *f) + { ++ if (TARGET_UNIFIED_ASM) ++ fprintf (f, "\t.syntax unified\n"); ++ else ++ fprintf (f, "\t.syntax divided\n"); ++ + if (TARGET_ARM) + { ++ fprintf (f, "\t.arm\n"); + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); + } + else if (TARGET_THUMB2) + { ++ fprintf (f, "\t.thumb\n"); + /* The Thumb-2 trampoline is similar to the arm implementation. + Unlike 16-bit Thumb, we enter the stub in thumb mode. */ + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", +@@ -7946,236 +8120,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode) + return x; + } + +-bool +-arm_legitimize_reload_address (rtx *p, +- machine_mode mode, +- int opnum, int type, +- int ind_levels ATTRIBUTE_UNUSED) +-{ +- /* We must recognize output that we have already generated ourselves. */ +- if (GET_CODE (*p) == PLUS +- && GET_CODE (XEXP (*p, 0)) == PLUS +- && REG_P (XEXP (XEXP (*p, 0), 0)) +- && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) +- && CONST_INT_P (XEXP (*p, 1))) +- { +- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, +- MODE_BASE_REG_CLASS (mode), GET_MODE (*p), +- VOIDmode, 0, 0, opnum, (enum reload_type) type); +- return true; +- } +- +- if (GET_CODE (*p) == PLUS +- && REG_P (XEXP (*p, 0)) +- && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) +- /* If the base register is equivalent to a constant, let the generic +- code handle it. Otherwise we will run into problems if a future +- reload pass decides to rematerialize the constant. */ +- && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0))) +- && CONST_INT_P (XEXP (*p, 1))) +- { +- HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); +- HOST_WIDE_INT low, high; +- +- /* Detect coprocessor load/stores. */ +- bool coproc_p = ((TARGET_HARD_FLOAT +- && TARGET_VFP +- && (mode == SFmode || mode == DFmode)) +- || (TARGET_REALLY_IWMMXT +- && VALID_IWMMXT_REG_MODE (mode)) +- || (TARGET_NEON +- && (VALID_NEON_DREG_MODE (mode) +- || VALID_NEON_QREG_MODE (mode)))); +- +- /* For some conditions, bail out when lower two bits are unaligned. */ +- if ((val & 0x3) != 0 +- /* Coprocessor load/store indexes are 8-bits + '00' appended. */ +- && (coproc_p +- /* For DI, and DF under soft-float: */ +- || ((mode == DImode || mode == DFmode) +- /* Without ldrd, we use stm/ldm, which does not +- fair well with unaligned bits. */ +- && (! TARGET_LDRD +- /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */ +- || TARGET_THUMB2)))) +- return false; +- +- /* When breaking down a [reg+index] reload address into [(reg+high)+low], +- of which the (reg+high) gets turned into a reload add insn, +- we try to decompose the index into high/low values that can often +- also lead to better reload CSE. +- For example: +- ldr r0, [r2, #4100] // Offset too large +- ldr r1, [r2, #4104] // Offset too large +- +- is best reloaded as: +- add t1, r2, #4096 +- ldr r0, [t1, #4] +- add t2, r2, #4096 +- ldr r1, [t2, #8] +- +- which post-reload CSE can simplify in most cases to eliminate the +- second add instruction: +- add t1, r2, #4096 +- ldr r0, [t1, #4] +- ldr r1, [t1, #8] +- +- The idea here is that we want to split out the bits of the constant +- as a mask, rather than as subtracting the maximum offset that the +- respective type of load/store used can handle. +- +- When encountering negative offsets, we can still utilize it even if +- the overall offset is positive; sometimes this may lead to an immediate +- that can be constructed with fewer instructions. +- For example: +- ldr r0, [r2, #0x3FFFFC] +- +- This is best reloaded as: +- add t1, r2, #0x400000 +- ldr r0, [t1, #-4] +- +- The trick for spotting this for a load insn with N bits of offset +- (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a +- negative offset that is going to make bit N and all the bits below +- it become zero in the remainder part. +- +- The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect +- to sign-magnitude addressing (i.e. separate +- bit, or 1's complement), +- used in most cases of ARM load/store instructions. */ +- +-#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \ +- (((VAL) & ((1 << (N)) - 1)) \ +- ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \ +- : 0) +- +- if (coproc_p) +- { +- low = SIGN_MAG_LOW_ADDR_BITS (val, 10); +- +- /* NEON quad-word load/stores are made of two double-word accesses, +- so the valid index range is reduced by 8. Treat as 9-bit range if +- we go over it. */ +- if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016) +- low = SIGN_MAG_LOW_ADDR_BITS (val, 9); +- } +- else if (GET_MODE_SIZE (mode) == 8) +- { +- if (TARGET_LDRD) +- low = (TARGET_THUMB2 +- ? SIGN_MAG_LOW_ADDR_BITS (val, 10) +- : SIGN_MAG_LOW_ADDR_BITS (val, 8)); +- else +- /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib) +- to access doublewords. The supported load/store offsets are +- -8, -4, and 4, which we try to produce here. */ +- low = ((val & 0xf) ^ 0x8) - 0x8; +- } +- else if (GET_MODE_SIZE (mode) < 8) +- { +- /* NEON element load/stores do not have an offset. */ +- if (TARGET_NEON_FP16 && mode == HFmode) +- return false; +- +- if (TARGET_THUMB2) +- { +- /* Thumb-2 has an asymmetrical index range of (-256,4096). +- Try the wider 12-bit range first, and re-try if the result +- is out of range. */ +- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); +- if (low < -255) +- low = SIGN_MAG_LOW_ADDR_BITS (val, 8); +- } +- else +- { +- if (mode == HImode || mode == HFmode) +- { +- if (arm_arch4) +- low = SIGN_MAG_LOW_ADDR_BITS (val, 8); +- else +- { +- /* The storehi/movhi_bytes fallbacks can use only +- [-4094,+4094] of the full ldrb/strb index range. */ +- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); +- if (low == 4095 || low == -4095) +- return false; +- } +- } +- else +- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); +- } +- } +- else +- return false; +- +- high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) +- ^ (unsigned HOST_WIDE_INT) 0x80000000) +- - (unsigned HOST_WIDE_INT) 0x80000000); +- /* Check for overflow or zero */ +- if (low == 0 || high == 0 || (high + low != val)) +- return false; +- +- /* Reload the high part into a base reg; leave the low part +- in the mem. +- Note that replacing this gen_rtx_PLUS with plus_constant is +- wrong in this case because we rely on the +- (plus (plus reg c1) c2) structure being preserved so that +- XEXP (*p, 0) in push_reload below uses the correct term. */ +- *p = gen_rtx_PLUS (GET_MODE (*p), +- gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), +- GEN_INT (high)), +- GEN_INT (low)); +- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, +- MODE_BASE_REG_CLASS (mode), GET_MODE (*p), +- VOIDmode, 0, 0, opnum, (enum reload_type) type); +- return true; +- } +- +- return false; +-} +- +-rtx +-thumb_legitimize_reload_address (rtx *x_p, +- machine_mode mode, +- int opnum, int type, +- int ind_levels ATTRIBUTE_UNUSED) +-{ +- rtx x = *x_p; +- +- if (GET_CODE (x) == PLUS +- && GET_MODE_SIZE (mode) < 4 +- && REG_P (XEXP (x, 0)) +- && XEXP (x, 0) == stack_pointer_rtx +- && CONST_INT_P (XEXP (x, 1)) +- && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) +- { +- rtx orig_x = x; +- +- x = copy_rtx (x); +- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), +- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); +- return x; +- } +- +- /* If both registers are hi-regs, then it's better to reload the +- entire expression rather than each register individually. That +- only requires one reload register rather than two. */ +- if (GET_CODE (x) == PLUS +- && REG_P (XEXP (x, 0)) +- && REG_P (XEXP (x, 1)) +- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) +- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) +- { +- rtx orig_x = x; +- +- x = copy_rtx (x); +- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), +- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); +- return x; +- } +- +- return NULL; +-} +- + /* Return TRUE if X contains any TLS symbol references. */ + + bool +@@ -9399,7 +9343,8 @@ static bool + arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) + { + const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; +- gcc_assert (GET_CODE (x) == UNSPEC); ++ rtx_code code = GET_CODE (x); ++ gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE); + + switch (XINT (x, 1)) + { +@@ -9445,7 +9390,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) + *cost = COSTS_N_INSNS (2); + break; + } +- return false; ++ return true; + } + + /* Cost of a libcall. We assume one insn per argument, an amount for the +@@ -11008,6 +10953,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + *cost = LIBCALL_COST (1); + return false; + ++ case UNSPEC_VOLATILE: + case UNSPEC: + return arm_unspec_cost (x, outer_code, speed_p, cost); + +@@ -12908,12 +12854,12 @@ neon_expand_vector_init (rtx target, rtx vals) + } + + /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise +- ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so +- reported source locations are bogus. */ ++ ERR if it doesn't. EXP indicates the source location, which includes the ++ inlining history for intrinsics. */ + + static void + bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, +- const char *err) ++ const_tree exp, const char *desc) + { + HOST_WIDE_INT lane; + +@@ -12922,15 +12868,22 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, + lane = INTVAL (operand); + + if (lane < low || lane >= high) +- error (err); ++ { ++ if (exp) ++ error ("%K%s %lld out of range %lld - %lld", ++ exp, desc, lane, low, high - 1); ++ else ++ error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1); ++ } + } + + /* Bounds-check lanes. */ + + void +-neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) ++neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, ++ const_tree exp) + { +- bounds_check (operand, low, high, "lane out of range"); ++ bounds_check (operand, low, high, exp, "lane"); + } + + /* Bounds-check constants. */ +@@ -12938,7 +12891,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) + void + neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) + { +- bounds_check (operand, low, high, "constant out of range"); ++ bounds_check (operand, low, high, NULL_TREE, "constant"); + } + + HOST_WIDE_INT +@@ -17287,14 +17240,16 @@ thumb2_reorg (void) + + FOR_EACH_BB_FN (bb, cfun) + { +- if (current_tune->disparage_flag_setting_t16_encodings ++ if ((current_tune->disparage_flag_setting_t16_encodings ++ == tune_params::DISPARAGE_FLAGS_ALL) + && optimize_bb_for_speed_p (bb)) + continue; + + rtx_insn *insn; + Convert_Action action = SKIP; + Convert_Action action_for_partial_flag_setting +- = (current_tune->disparage_partial_flag_setting_t16_encodings ++ = ((current_tune->disparage_flag_setting_t16_encodings ++ != tune_params::DISPARAGE_FLAGS_NEITHER) + && optimize_bb_for_speed_p (bb)) + ? SKIP : CONV; + +@@ -17699,7 +17654,7 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, + } + + conditional = reverse ? "%?%D0" : "%?%d0"; +- if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM) ++ if ((regno_base == SP_REGNUM) && TARGET_THUMB) + { + /* Output pop (not stmfd) because it has a shorter encoding. */ + gcc_assert (update); +@@ -17998,19 +17953,27 @@ output_mov_long_double_arm_from_arm (rtx *operands) + void + arm_emit_movpair (rtx dest, rtx src) + { ++ rtx insn; ++ + /* If the src is an immediate, simplify it. */ + if (CONST_INT_P (src)) + { + HOST_WIDE_INT val = INTVAL (src); + emit_set_insn (dest, GEN_INT (val & 0x0000ffff)); + if ((val >> 16) & 0x0000ffff) +- emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), +- GEN_INT (16)), +- GEN_INT ((val >> 16) & 0x0000ffff)); ++ { ++ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), ++ GEN_INT (16)), ++ GEN_INT ((val >> 16) & 0x0000ffff)); ++ insn = get_last_insn (); ++ set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); ++ } + return; + } + emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); + emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); ++ insn = get_last_insn (); ++ set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); + } + + /* Output a move between double words. It must be REG<-MEM +@@ -24077,19 +24040,19 @@ thumb_far_jump_used_p (void) + } + + /* Return nonzero if FUNC must be entered in ARM mode. */ +-int ++static bool + is_called_in_ARM_mode (tree func) + { + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + /* Ignore the problem about functions whose address is taken. */ + if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func)) +- return TRUE; ++ return true; + + #ifdef ARM_PE + return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE; + #else +- return FALSE; ++ return false; + #endif + } + +@@ -24375,6 +24338,24 @@ arm_init_expanders (void) + mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY); + } + ++/* Check that FUNC is called with a different mode. */ ++ ++bool ++arm_change_mode_p (tree func) ++{ ++ if (TREE_CODE (func) != FUNCTION_DECL) ++ return false; ++ ++ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func); ++ ++ if (!callee_tree) ++ callee_tree = target_option_default_node; ++ ++ struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); ++ int flags = callee_opts->x_target_flags; ++ ++ return (TARGET_THUMB_P (flags) != TARGET_THUMB); ++} + + /* Like arm_compute_initial_elimination offset. Simpler because there + isn't an ABI specified frame pointer for Thumb. Instead, we set it +@@ -25660,12 +25641,12 @@ arm_print_tune_info (void) + current_tune->constant_limit); + asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n", + current_tune->max_insns_skipped); +- asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n", +- current_tune->num_prefetch_slots); +- asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n", +- current_tune->l1_cache_size); +- asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n", +- current_tune->l1_cache_line_size); ++ asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n", ++ current_tune->prefetch.num_slots); ++ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n", ++ current_tune->prefetch.l1_cache_size); ++ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n", ++ current_tune->prefetch.l1_cache_line_size); + asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n", + (int) current_tune->prefer_constant_pool); + asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n"); +@@ -25681,23 +25662,19 @@ arm_print_tune_info (void) + asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n", + (int) current_tune->prefer_ldrd_strd); + asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n", +- (int) current_tune->logical_op_non_short_circuit[0], +- (int) current_tune->logical_op_non_short_circuit[1]); ++ (int) current_tune->logical_op_non_short_circuit_thumb, ++ (int) current_tune->logical_op_non_short_circuit_arm); + asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n", + (int) current_tune->prefer_neon_for_64bits); + asm_fprintf (asm_out_file, + "\t\t@disparage_flag_setting_t16_encodings:\t%d\n", + (int) current_tune->disparage_flag_setting_t16_encodings); +- asm_fprintf (asm_out_file, +- "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n", +- (int) current_tune +- ->disparage_partial_flag_setting_t16_encodings); + asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n", + (int) current_tune->string_ops_prefer_neon); + asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n", + current_tune->max_insns_inline_memset); +- asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n", +- current_tune->fuseable_ops); ++ asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n", ++ current_tune->fusible_ops); + asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n", + (int) current_tune->sched_autopref); + } +@@ -25707,9 +25684,6 @@ arm_file_start (void) + { + int val; + +- if (TARGET_UNIFIED_ASM) +- asm_fprintf (asm_out_file, "\t.syntax unified\n"); +- + if (TARGET_BPABI) + { + const char *fpu_name; +@@ -26509,7 +26483,7 @@ arm_dbx_register_number (unsigned int regno) + if (IS_IWMMXT_REGNUM (regno)) + return 112 + regno - FIRST_IWMMXT_REGNUM; + +- gcc_unreachable (); ++ return DWARF_FRAME_REGISTERS; + } + + /* Dwarf models VFPv3 registers as 32 64-bit registers. +@@ -27213,40 +27187,12 @@ thumb2_output_casesi (rtx *operands) + } + } + +-/* Most ARM cores are single issue, but some newer ones can dual issue. +- The scheduler descriptions rely on this being correct. */ ++/* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the ++ per-core tuning structs. */ + static int + arm_issue_rate (void) + { +- switch (arm_tune) +- { +- case xgene1: +- return 4; +- +- case cortexa15: +- case cortexa57: +- case exynosm1: +- return 3; +- +- case cortexm7: +- case cortexr4: +- case cortexr4f: +- case cortexr5: +- case genericv7a: +- case cortexa5: +- case cortexa7: +- case cortexa8: +- case cortexa9: +- case cortexa12: +- case cortexa17: +- case cortexa53: +- case fa726te: +- case marvell_pj4: +- return 2; +- +- default: +- return 1; +- } ++ return current_tune->issue_rate; + } + + /* Return how many instructions should scheduler lookahead to choose the +@@ -29411,7 +29357,7 @@ arm_gen_setmem (rtx *operands) + static bool + arm_macro_fusion_p (void) + { +- return current_tune->fuseable_ops != ARM_FUSE_NOTHING; ++ return current_tune->fusible_ops != tune_params::FUSE_NOTHING; + } + + +@@ -29432,44 +29378,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) + if (!arm_macro_fusion_p ()) + return false; + +- if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT) ++ if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT) + { + /* We are trying to fuse +- movw imm / movt imm +- instructions as a group that gets scheduled together. */ ++ movw imm / movt imm ++ instructions as a group that gets scheduled together. */ + + set_dest = SET_DEST (curr_set); + + if (GET_MODE (set_dest) != SImode) +- return false; ++ return false; + + /* We are trying to match: +- prev (movw) == (set (reg r0) (const_int imm16)) +- curr (movt) == (set (zero_extract (reg r0) +- (const_int 16) +- (const_int 16)) +- (const_int imm16_1)) +- or +- prev (movw) == (set (reg r1) +- (high (symbol_ref ("SYM")))) +- curr (movt) == (set (reg r0) +- (lo_sum (reg r1) +- (symbol_ref ("SYM")))) */ ++ prev (movw) == (set (reg r0) (const_int imm16)) ++ curr (movt) == (set (zero_extract (reg r0) ++ (const_int 16) ++ (const_int 16)) ++ (const_int imm16_1)) ++ or ++ prev (movw) == (set (reg r1) ++ (high (symbol_ref ("SYM")))) ++ curr (movt) == (set (reg r0) ++ (lo_sum (reg r1) ++ (symbol_ref ("SYM")))) */ + if (GET_CODE (set_dest) == ZERO_EXTRACT) +- { +- if (CONST_INT_P (SET_SRC (curr_set)) +- && CONST_INT_P (SET_SRC (prev_set)) +- && REG_P (XEXP (set_dest, 0)) +- && REG_P (SET_DEST (prev_set)) +- && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) +- return true; +- } ++ { ++ if (CONST_INT_P (SET_SRC (curr_set)) ++ && CONST_INT_P (SET_SRC (prev_set)) ++ && REG_P (XEXP (set_dest, 0)) ++ && REG_P (SET_DEST (prev_set)) ++ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) ++ return true; ++ } + else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM +- && REG_P (SET_DEST (curr_set)) +- && REG_P (SET_DEST (prev_set)) +- && GET_CODE (SET_SRC (prev_set)) == HIGH +- && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) +- return true; ++ && REG_P (SET_DEST (curr_set)) ++ && REG_P (SET_DEST (prev_set)) ++ && GET_CODE (SET_SRC (prev_set)) == HIGH ++ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) ++ return true; + } + return false; + } +@@ -29533,6 +29479,273 @@ arm_is_constant_pool_ref (rtx x) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))); + } + ++/* Remember the last target of arm_set_current_function. */ ++static GTY(()) tree arm_previous_fndecl; ++ ++/* Invalidate arm_previous_fndecl. */ ++void ++arm_reset_previous_fndecl (void) ++{ ++ arm_previous_fndecl = NULL_TREE; ++} ++ ++/* Establish appropriate back-end context for processing the function ++ FNDECL. The argument might be NULL to indicate processing at top ++ level, outside of any function scope. */ ++static void ++arm_set_current_function (tree fndecl) ++{ ++ if (!fndecl || fndecl == arm_previous_fndecl) ++ return; ++ ++ tree old_tree = (arm_previous_fndecl ++ ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl) ++ : NULL_TREE); ++ ++ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ ++ arm_previous_fndecl = fndecl; ++ if (old_tree == new_tree) ++ return; ++ ++ if (new_tree && new_tree != target_option_default_node) ++ { ++ cl_target_option_restore (&global_options, ++ TREE_TARGET_OPTION (new_tree)); ++ ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else ++ TREE_TARGET_GLOBALS (new_tree) ++ = save_target_globals_default_opts (); ++ } ++ ++ else if (old_tree && old_tree != target_option_default_node) ++ { ++ new_tree = target_option_current_node; ++ ++ cl_target_option_restore (&global_options, ++ TREE_TARGET_OPTION (new_tree)); ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) ++ = save_target_globals_default_opts (); ++ } ++ ++ arm_option_params_internal (); ++} ++ ++/* Implement TARGET_OPTION_PRINT. */ + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane) ++static void ++arm_option_print (FILE *file, int indent, struct cl_target_option *ptr) +{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); ++ int flags = ptr->x_target_flags; ++ ++ fprintf (file, "%*sselected arch %s\n", indent, "", ++ TARGET_THUMB2_P (flags) ? "thumb2" : ++ TARGET_THUMB_P (flags) ? "thumb1" : ++ "arm"); +} + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane) ++/* Hook to determine if one function can safely inline another. */ ++ ++static bool ++arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED) +{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); ++ /* Overidde default hook: Always OK to inline between different modes. ++ Function with mode specific instructions, e.g using asm, must be explicitely ++ protected with noinline. */ ++ return true; +} + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane) ++/* Inner function to process the attribute((target(...))), take an argument and ++ set the current options from the argument. If we have a list, recursively ++ go over the list. */ ++ ++static bool ++arm_valid_target_attribute_rec (tree args, struct gcc_options *opts) +{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); ++ if (TREE_CODE (args) == TREE_LIST) ++ { ++ bool ret = true; ++ for (; args; args = TREE_CHAIN (args)) ++ if (TREE_VALUE (args) ++ && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts)) ++ ret = false; ++ return ret; ++ } ++ ++ else if (TREE_CODE (args) != STRING_CST) ++ { ++ error ("attribute % argument not a string"); ++ return false; ++ } ++ ++ char *argstr = ASTRDUP (TREE_STRING_POINTER (args)); ++ while (argstr && *argstr != '\0') ++ { ++ while (ISSPACE (*argstr)) ++ argstr++; ++ ++ if (!strcmp (argstr, "thumb")) ++ { ++ opts->x_target_flags |= MASK_THUMB; ++ arm_option_check_internal (opts); ++ return true; ++ } ++ ++ if (!strcmp (argstr, "arm")) ++ { ++ opts->x_target_flags &= ~MASK_THUMB; ++ arm_option_check_internal (opts); ++ return true; ++ } ++ ++ warning (0, "attribute(target(\"%s\")) is unknown", argstr); ++ return false; ++ } ++ ++ return false; ++} ++ ++/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ ++ ++tree ++arm_valid_target_attribute_tree (tree args, struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ if (!arm_valid_target_attribute_rec (args, opts)) ++ return NULL_TREE; ++ ++ /* Do any overrides, such as global options arch=xxx. */ ++ arm_option_override_internal (opts, opts_set); ++ ++ return build_target_option_node (opts); ++} ++ ++static void ++add_attribute (const char * mode, tree *attributes) ++{ ++ size_t len = strlen (mode); ++ tree value = build_string (len, mode); ++ ++ TREE_TYPE (value) = build_array_type (char_type_node, ++ build_index_type (size_int (len))); ++ ++ *attributes = tree_cons (get_identifier ("target"), ++ build_tree_list (NULL_TREE, value), ++ *attributes); ++} ++ ++/* For testing. Insert thumb or arm modes alternatively on functions. */ ++ ++static void ++arm_insert_attributes (tree fndecl, tree * attributes) ++{ ++ const char *mode; ++ ++ if (! TARGET_FLIP_THUMB) ++ return; ++ ++ if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl) ++ || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl)) ++ return; ++ ++ /* Nested definitions must inherit mode. */ ++ if (current_function_decl) ++ { ++ mode = TARGET_THUMB ? "thumb" : "arm"; ++ add_attribute (mode, attributes); ++ return; ++ } ++ ++ /* If there is already a setting don't change it. */ ++ if (lookup_attribute ("target", *attributes) != NULL) ++ return; ++ ++ mode = thumb_flipper ? "thumb" : "arm"; ++ add_attribute (mode, attributes); ++ ++ thumb_flipper = !thumb_flipper; ++} ++ ++/* Hook to validate attribute((target("string"))). */ ++ ++static bool ++arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name), ++ tree args, int ARG_UNUSED (flags)) ++{ ++ bool ret = true; ++ struct gcc_options func_options; ++ tree cur_tree, new_optimize; ++ gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); ++ ++ /* Get the optimization options of the current function. */ ++ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); ++ ++ /* If the function changed the optimization levels as well as setting target ++ options, start with the optimizations specified. */ ++ if (!func_optimize) ++ func_optimize = optimization_default_node; ++ ++ /* Init func_options. */ ++ memset (&func_options, 0, sizeof (func_options)); ++ init_options_struct (&func_options, NULL); ++ lang_hooks.init_options_struct (&func_options); ++ ++ /* Initialize func_options to the defaults. */ ++ cl_optimization_restore (&func_options, ++ TREE_OPTIMIZATION (func_optimize)); ++ ++ cl_target_option_restore (&func_options, ++ TREE_TARGET_OPTION (target_option_default_node)); ++ ++ /* Set func_options flags with new target mode. */ ++ cur_tree = arm_valid_target_attribute_tree (args, &func_options, ++ &global_options_set); ++ ++ if (cur_tree == NULL_TREE) ++ ret = false; ++ ++ new_optimize = build_optimization_node (&func_options); ++ ++ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree; ++ ++ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; ++ ++ return ret; ++} ++ ++void ++arm_declare_function_name (FILE *stream, const char *name, tree decl) ++{ ++ if (TARGET_UNIFIED_ASM) ++ fprintf (stream, "\t.syntax unified\n"); ++ else ++ fprintf (stream, "\t.syntax divided\n"); ++ ++ if (TARGET_THUMB) ++ { ++ if (is_called_in_ARM_mode (decl) ++ || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY ++ && cfun->is_thunk)) ++ fprintf (stream, "\t.code 32\n"); ++ else if (TARGET_THUMB1) ++ fprintf (stream, "\t.code\t16\n\t.thumb_func\n"); ++ else ++ fprintf (stream, "\t.thumb\n\t.thumb_func\n"); ++ } ++ else ++ fprintf (stream, "\t.arm\n"); ++ ++ if (TARGET_POKE_FUNCTION_NAME) ++ arm_poke_function_name (stream, (const char *) name); +} + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} + /* If MEM is in the form of [base+offset], extract the two parts + of address and set to BASE and OFFSET, otherwise return false + after clearing BASE and OFFSET. */ +--- a/src/gcc/config/arm/arm.h ++++ b/src/gcc/config/arm/arm.h +@@ -45,132 +45,7 @@ + extern char arm_arch_name[]; + + /* Target CPU builtins. */ +-#define TARGET_CPU_CPP_BUILTINS() \ +- do \ +- { \ +- if (TARGET_DSP_MULTIPLY) \ +- builtin_define ("__ARM_FEATURE_DSP"); \ +- if (TARGET_ARM_QBIT) \ +- builtin_define ("__ARM_FEATURE_QBIT"); \ +- if (TARGET_ARM_SAT) \ +- builtin_define ("__ARM_FEATURE_SAT"); \ +- if (TARGET_CRYPTO) \ +- builtin_define ("__ARM_FEATURE_CRYPTO"); \ +- if (unaligned_access) \ +- builtin_define ("__ARM_FEATURE_UNALIGNED"); \ +- if (TARGET_CRC32) \ +- builtin_define ("__ARM_FEATURE_CRC32"); \ +- if (TARGET_32BIT) \ +- builtin_define ("__ARM_32BIT_STATE"); \ +- if (TARGET_ARM_FEATURE_LDREX) \ +- builtin_define_with_int_value ( \ +- "__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX); \ +- if ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB) \ +- || TARGET_ARM_ARCH_ISA_THUMB >=2) \ +- builtin_define ("__ARM_FEATURE_CLZ"); \ +- if (TARGET_INT_SIMD) \ +- builtin_define ("__ARM_FEATURE_SIMD32"); \ +- \ +- builtin_define_with_int_value ( \ +- "__ARM_SIZEOF_MINIMAL_ENUM", \ +- flag_short_enums ? 1 : 4); \ +- builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \ +- wchar_type_node); \ +- if (TARGET_ARM_ARCH_PROFILE) \ +- builtin_define_with_int_value ( \ +- "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \ +- \ +- /* Define __arm__ even when in thumb mode, for \ +- consistency with armcc. */ \ +- builtin_define ("__arm__"); \ +- if (TARGET_ARM_ARCH) \ +- builtin_define_with_int_value ( \ +- "__ARM_ARCH", TARGET_ARM_ARCH); \ +- if (arm_arch_notm) \ +- builtin_define ("__ARM_ARCH_ISA_ARM"); \ +- builtin_define ("__APCS_32__"); \ +- if (TARGET_THUMB) \ +- builtin_define ("__thumb__"); \ +- if (TARGET_THUMB2) \ +- builtin_define ("__thumb2__"); \ +- if (TARGET_ARM_ARCH_ISA_THUMB) \ +- builtin_define_with_int_value ( \ +- "__ARM_ARCH_ISA_THUMB", \ +- TARGET_ARM_ARCH_ISA_THUMB); \ +- \ +- if (TARGET_BIG_END) \ +- { \ +- builtin_define ("__ARMEB__"); \ +- builtin_define ("__ARM_BIG_ENDIAN"); \ +- if (TARGET_THUMB) \ +- builtin_define ("__THUMBEB__"); \ +- } \ +- else \ +- { \ +- builtin_define ("__ARMEL__"); \ +- if (TARGET_THUMB) \ +- builtin_define ("__THUMBEL__"); \ +- } \ +- \ +- if (TARGET_SOFT_FLOAT) \ +- builtin_define ("__SOFTFP__"); \ +- \ +- if (TARGET_VFP) \ +- builtin_define ("__VFP_FP__"); \ +- \ +- if (TARGET_ARM_FP) \ +- builtin_define_with_int_value ( \ +- "__ARM_FP", TARGET_ARM_FP); \ +- if (arm_fp16_format == ARM_FP16_FORMAT_IEEE) \ +- builtin_define ("__ARM_FP16_FORMAT_IEEE"); \ +- if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) \ +- builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE"); \ +- if (TARGET_FMA) \ +- builtin_define ("__ARM_FEATURE_FMA"); \ +- \ +- if (TARGET_NEON) \ +- { \ +- builtin_define ("__ARM_NEON__"); \ +- builtin_define ("__ARM_NEON"); \ +- } \ +- if (TARGET_NEON_FP) \ +- builtin_define_with_int_value ( \ +- "__ARM_NEON_FP", TARGET_NEON_FP); \ +- \ +- /* Add a define for interworking. \ +- Needed when building libgcc.a. */ \ +- if (arm_cpp_interwork) \ +- builtin_define ("__THUMB_INTERWORK__"); \ +- \ +- builtin_assert ("cpu=arm"); \ +- builtin_assert ("machine=arm"); \ +- \ +- builtin_define (arm_arch_name); \ +- if (arm_arch_xscale) \ +- builtin_define ("__XSCALE__"); \ +- if (arm_arch_iwmmxt) \ +- { \ +- builtin_define ("__IWMMXT__"); \ +- builtin_define ("__ARM_WMMX"); \ +- } \ +- if (arm_arch_iwmmxt2) \ +- builtin_define ("__IWMMXT2__"); \ +- if (TARGET_AAPCS_BASED) \ +- { \ +- if (arm_pcs_default == ARM_PCS_AAPCS_VFP) \ +- builtin_define ("__ARM_PCS_VFP"); \ +- else if (arm_pcs_default == ARM_PCS_AAPCS) \ +- builtin_define ("__ARM_PCS"); \ +- builtin_define ("__ARM_EABI__"); \ +- } \ +- if (TARGET_IDIV) \ +- { \ +- builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ +- builtin_define ("__ARM_FEATURE_IDIV"); \ +- } \ +- if (inline_asm_unified) \ +- builtin_define ("__ARM_ASM_SYNTAX_UNIFIED__");\ +- } while (0) ++#define TARGET_CPU_CPP_BUILTINS() arm_cpu_cpp_builtins (pfile) + + #include "config/arm/arm-opts.h" + +@@ -252,6 +127,11 @@ extern void (*arm_lang_output_object_attributes_hook)(void); + #define SUBTARGET_CPP_SPEC "" + #endif + ++/* Tree Target Specification. */ ++#define TARGET_ARM_P(flags) (!TARGET_THUMB_P (flags)) ++#define TARGET_THUMB1_P(flags) (TARGET_THUMB_P (flags) && !arm_arch_thumb2) ++#define TARGET_THUMB2_P(flags) (TARGET_THUMB_P (flags) && arm_arch_thumb2) ++ + /* Run-time Target Specification. */ + #define TARGET_SOFT_FLOAT (arm_float_abi == ARM_FLOAT_ABI_SOFT) + /* Use hardware floating point instructions. */ +@@ -367,21 +247,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void); + #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) + + /* Nonzero if this chip supports ldrex and strex */ +-#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) ++#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) + + /* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ +-#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) ++#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) + + /* Nonzero if this chip supports ldrexd and strexd. */ +-#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \ +- && arm_arch_notm) ++#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) \ ++ || arm_arch7) && arm_arch_notm) + + /* Nonzero if this chip supports load-acquire and store-release. */ + #define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8) + + /* Nonzero if integer division instructions supported. */ +-#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ +- || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) ++#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ ++ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) + + /* Nonzero if disallow volatile memory access in IT block. */ + #define TARGET_NO_VOLATILE_CE (arm_arch_no_volatile_ce) +@@ -389,6 +269,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void); + /* Should NEON be used for 64-bits bitops. */ + #define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits) + ++/* Should constant I be slplit for OP. */ ++#define DONT_EARLY_SPLIT_CONSTANT(i, op) \ ++ ((optimize >= 2) \ ++ && can_create_pseudo_p () \ ++ && !const_ok_for_op (i, op)) ++ + /* True iff the full BPABI is being used. If TARGET_BPABI is true, + then TARGET_AAPCS_BASED must be true -- but the converse does not + hold. TARGET_BPABI implies the use of the BPABI runtime library, +@@ -473,7 +359,7 @@ enum base_architecture + BASE_ARCH_5TEJ = 5, + BASE_ARCH_6 = 6, + BASE_ARCH_6J = 6, +- BASE_ARCH_6ZK = 6, ++ BASE_ARCH_6KZ = 6, + BASE_ARCH_6K = 6, + BASE_ARCH_6T2 = 6, + BASE_ARCH_6M = 6, +@@ -528,12 +414,6 @@ extern int arm_arch8; + /* Nonzero if this chip can benefit from load scheduling. */ + extern int arm_ld_sched; + +-/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2. */ +-extern int thumb_code; +- +-/* Nonzero if generating Thumb-1 code. */ +-extern int thumb1_code; +- + /* Nonzero if this chip is a StrongARM. */ + extern int arm_tune_strongarm; + +@@ -1360,46 +1240,6 @@ enum reg_class + ? GENERAL_REGS : NO_REGS) \ + : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X))) + +-/* Try a machine-dependent way of reloading an illegitimate address +- operand. If we find one, push the reload and jump to WIN. This +- macro is used in only one place: `find_reloads_address' in reload.c. +- +- For the ARM, we wish to handle large displacements off a base +- register by splitting the addend across a MOV and the mem insn. +- This can cut the number of reloads needed. */ +-#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \ +- do \ +- { \ +- if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \ +- goto WIN; \ +- } \ +- while (0) +- +-/* XXX If an HImode FP+large_offset address is converted to an HImode +- SP+large_offset address, then reload won't know how to fix it. It sees +- only that SP isn't valid for HImode, and so reloads the SP into an index +- register, but the resulting address is still invalid because the offset +- is too big. We fix it here instead by reloading the entire address. */ +-/* We could probably achieve better results by defining PROMOTE_MODE to help +- cope with the variances between the Thumb's signed and unsigned byte and +- halfword load instructions. */ +-/* ??? This should be safe for thumb2, but we may be able to do better. */ +-#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ +-do { \ +- rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \ +- if (new_x) \ +- { \ +- X = new_x; \ +- goto WIN; \ +- } \ +-} while (0) +- +-#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \ +- if (TARGET_ARM) \ +- ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \ +- else \ +- THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) +- + /* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + ARM regs are UNITS_PER_WORD bits. +@@ -2096,10 +1936,11 @@ enum arm_auto_incmodes + (current_tune->branch_cost (speed_p, predictable_p)) + + /* False if short circuit operation is preferred. */ +-#define LOGICAL_OP_NON_SHORT_CIRCUIT \ +- ((optimize_size) \ +- ? (TARGET_THUMB ? false : true) \ +- : (current_tune->logical_op_non_short_circuit[TARGET_ARM])) ++#define LOGICAL_OP_NON_SHORT_CIRCUIT \ ++ ((optimize_size) \ ++ ? (TARGET_THUMB ? false : true) \ ++ : TARGET_THUMB ? static_cast (current_tune->logical_op_non_short_circuit_thumb) \ ++ : static_cast (current_tune->logical_op_non_short_circuit_arm)) + + + /* Position Independent Code. */ +@@ -2135,7 +1976,8 @@ extern int making_const_table; + c_register_pragma (0, "long_calls", arm_pr_long_calls); \ + c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls); \ + c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off); \ +- arm_lang_object_attributes_init(); \ ++ arm_lang_object_attributes_init(); \ ++ arm_register_target_pragmas(); \ + } while (0) + + /* Condition code information. */ +@@ -2222,23 +2064,7 @@ extern int making_const_table; + ? 1 : 0) + + #define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ +- do \ +- { \ +- if (TARGET_THUMB) \ +- { \ +- if (is_called_in_ARM_mode (DECL) \ +- || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY \ +- && cfun->is_thunk)) \ +- fprintf (STREAM, "\t.code 32\n") ; \ +- else if (TARGET_THUMB1) \ +- fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ; \ +- else \ +- fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ; \ +- } \ +- if (TARGET_POKE_FUNCTION_NAME) \ +- arm_poke_function_name (STREAM, (const char *) NAME); \ +- } \ +- while (0) ++ arm_declare_function_name ((STREAM), (NAME), (DECL)); + + /* For aliases of functions we use .thumb_set instead. */ + #define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2) \ +@@ -2413,4 +2239,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); + + #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS + #define TARGET_SUPPORTS_WIDE_INT 1 ++ ++/* For switching between functions with different target attributes. */ ++#define SWITCHABLE_TARGET 1 ++ + #endif /* ! GCC_ARM_H */ +--- a/src/gcc/config/arm/arm.md ++++ b/src/gcc/config/arm/arm.md +@@ -69,13 +69,17 @@ + ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when + ; generating ARM code. This is used to control the length of some insn + ; patterns that share the same RTL in both ARM and Thumb code. +-(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) ++(define_attr "is_thumb" "yes,no" ++ (const (if_then_else (symbol_ref "TARGET_THUMB") ++ (const_string "yes") (const_string "no")))) + + ; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6. + (define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6"))) + + ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. +-(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) ++(define_attr "is_thumb1" "yes,no" ++ (const (if_then_else (symbol_ref "TARGET_THUMB1") ++ (const_string "yes") (const_string "no")))) + + ; We use this attribute to disable alternatives that can produce 32-bit + ; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks +@@ -1164,10 +1168,16 @@ + { + if (TARGET_32BIT) + { +- arm_split_constant (MINUS, SImode, NULL_RTX, +- INTVAL (operands[1]), operands[0], +- operands[2], optimize && can_create_pseudo_p ()); +- DONE; ++ if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[1]), MINUS)) ++ operands[1] = force_reg (SImode, operands[1]); ++ else ++ { ++ arm_split_constant (MINUS, SImode, NULL_RTX, ++ INTVAL (operands[1]), operands[0], ++ operands[2], ++ optimize && can_create_pseudo_p ()); ++ DONE; ++ } + } + else /* TARGET_THUMB1 */ + operands[1] = force_reg (SImode, operands[1]); +@@ -1177,9 +1187,9 @@ + + ; ??? Check Thumb-2 split length + (define_insn_and_split "*arm_subsi3_insn" +- [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r") +- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n") +- (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))] ++ [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r") ++ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n") ++ (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))] + "TARGET_32BIT" + "@ + sub%?\\t%0, %1, %2 +@@ -2078,14 +2088,19 @@ + operands[1] = convert_to_mode (QImode, operands[1], 1); + emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0], + operands[1])); ++ DONE; + } ++ else if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), AND)) ++ operands[2] = force_reg (SImode, operands[2]); + else +- arm_split_constant (AND, SImode, NULL_RTX, +- INTVAL (operands[2]), operands[0], +- operands[1], +- optimize && can_create_pseudo_p ()); ++ { ++ arm_split_constant (AND, SImode, NULL_RTX, ++ INTVAL (operands[2]), operands[0], ++ operands[1], ++ optimize && can_create_pseudo_p ()); + +- DONE; ++ DONE; ++ } + } + } + else /* TARGET_THUMB1 */ +@@ -2768,6 +2783,55 @@ + (const_string "logic_shift_reg")))] + ) + ++;; Shifted bics pattern used to set up CC status register and not reusing ++;; bics output. Pattern restricts Thumb2 shift operand as bics for Thumb2 ++;; does not support shift by register. ++(define_insn "andsi_not_shiftsi_si_scc_no_reuse" ++ [(set (reg:CC_NOOV CC_REGNUM) ++ (compare:CC_NOOV ++ (and:SI (not:SI (match_operator:SI 0 "shift_operator" ++ [(match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "arm_rhs_operand" "rM")])) ++ (match_operand:SI 3 "s_register_operand" "r")) ++ (const_int 0))) ++ (clobber (match_scratch:SI 4 "=r"))] ++ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))" ++ "bic%.%?\\t%4, %3, %1%S0" ++ [(set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "conds" "set") ++ (set_attr "shift" "1") ++ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "logic_shift_imm") ++ (const_string "logic_shift_reg")))] ++) ++ ++;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also ++;; getting reused later. ++(define_insn "andsi_not_shiftsi_si_scc" ++ [(parallel [(set (reg:CC_NOOV CC_REGNUM) ++ (compare:CC_NOOV ++ (and:SI (not:SI (match_operator:SI 0 "shift_operator" ++ [(match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "arm_rhs_operand" "rM")])) ++ (match_operand:SI 3 "s_register_operand" "r")) ++ (const_int 0))) ++ (set (match_operand:SI 4 "s_register_operand" "=r") ++ (and:SI (not:SI (match_op_dup 0 ++ [(match_dup 1) ++ (match_dup 2)])) ++ (match_dup 3)))])] ++ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))" ++ "bic%.%?\\t%4, %3, %1%S0" ++ [(set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "conds" "set") ++ (set_attr "shift" "1") ++ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "logic_shift_imm") ++ (const_string "logic_shift_reg")))] ++) ++ + (define_insn "*andsi_notsi_si_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV +@@ -2884,10 +2948,16 @@ + { + if (TARGET_32BIT) + { +- arm_split_constant (IOR, SImode, NULL_RTX, +- INTVAL (operands[2]), operands[0], operands[1], +- optimize && can_create_pseudo_p ()); +- DONE; ++ if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), IOR)) ++ operands[2] = force_reg (SImode, operands[2]); ++ else ++ { ++ arm_split_constant (IOR, SImode, NULL_RTX, ++ INTVAL (operands[2]), operands[0], ++ operands[1], ++ optimize && can_create_pseudo_p ()); ++ DONE; ++ } + } + else /* TARGET_THUMB1 */ + { +@@ -3054,10 +3124,16 @@ + { + if (TARGET_32BIT) + { +- arm_split_constant (XOR, SImode, NULL_RTX, +- INTVAL (operands[2]), operands[0], operands[1], +- optimize && can_create_pseudo_p ()); +- DONE; ++ if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), XOR)) ++ operands[2] = force_reg (SImode, operands[2]); ++ else ++ { ++ arm_split_constant (XOR, SImode, NULL_RTX, ++ INTVAL (operands[2]), operands[0], ++ operands[1], ++ optimize && can_create_pseudo_p ()); ++ DONE; ++ } + } + else /* TARGET_THUMB1 */ + { +@@ -5076,7 +5152,7 @@ + + (define_split + [(set (match_operand:SI 0 "s_register_operand" "") +- (ior_xor:SI (and:SI (ashift:SI ++ (IOR_XOR:SI (and:SI (ashift:SI + (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")) +@@ -5088,7 +5164,7 @@ + == (GET_MODE_MASK (GET_MODE (operands[5])) + & (GET_MODE_MASK (GET_MODE (operands[5])) + << (INTVAL (operands[2])))))" +- [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) ++ [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 4))) + (set (match_dup 0) (zero_extend:SI (match_dup 5)))] + "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" +@@ -5590,10 +5666,18 @@ + && !(const_ok_for_arm (INTVAL (operands[1])) + || const_ok_for_arm (~INTVAL (operands[1])))) + { +- arm_split_constant (SET, SImode, NULL_RTX, +- INTVAL (operands[1]), operands[0], NULL_RTX, +- optimize && can_create_pseudo_p ()); +- DONE; ++ if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[1]), SET)) ++ { ++ emit_insn (gen_rtx_SET (SImode, operands[0], operands[1])); ++ DONE; ++ } ++ else ++ { ++ arm_split_constant (SET, SImode, NULL_RTX, ++ INTVAL (operands[1]), operands[0], NULL_RTX, ++ optimize && can_create_pseudo_p ()); ++ DONE; ++ } + } + } + else /* TARGET_THUMB1... */ +@@ -5667,7 +5751,7 @@ + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "length" "4") +- (set_attr "type" "mov_imm")] ++ (set_attr "type" "alu_sreg")] + ) + + (define_insn "*arm_movsi_insn" +@@ -6713,7 +6797,7 @@ + + /* Support only fixed point registers. */ + if (!CONST_INT_P (operands[2]) +- || INTVAL (operands[2]) > 14 ++ || INTVAL (operands[2]) > MAX_LDM_STM_OPS + || INTVAL (operands[2]) < 2 + || !MEM_P (operands[1]) + || !REG_P (operands[0]) +@@ -6738,7 +6822,7 @@ + + /* Support only fixed point registers. */ + if (!CONST_INT_P (operands[2]) +- || INTVAL (operands[2]) > 14 ++ || INTVAL (operands[2]) > MAX_LDM_STM_OPS + || INTVAL (operands[2]) < 2 + || !REG_P (operands[1]) + || !MEM_P (operands[0]) +@@ -6923,7 +7007,7 @@ + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a,a") +- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) ++ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) + + (define_insn "*cmpsi_shiftsi_swp" + [(set (reg:CC_SWP CC_REGNUM) +@@ -6936,7 +7020,7 @@ + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a,a") +- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) ++ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) + + (define_insn "*arm_cmpsi_negshiftsi_si" + [(set (reg:CC_Z CC_REGNUM) +@@ -7529,10 +7613,10 @@ + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") +- (const_string "mov_reg") +- (const_string "mov_reg") +- (const_string "mov_reg") +- (const_string "mov_reg")])] ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*movsfcc_soft_insn" +@@ -7755,6 +7839,13 @@ + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" + "* + { ++ rtx op = operands[0]; ++ ++ /* Switch mode now when possible. */ ++ if (SYMBOL_REF_DECL (op) && !TREE_PUBLIC (SYMBOL_REF_DECL (op)) ++ && arm_arch5 && arm_change_mode_p (SYMBOL_REF_DECL (op))) ++ return NEED_PLT_RELOC ? \"blx%?\\t%a0(PLT)\" : \"blx%?\\t(%a0)\"; ++ + return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\"; + }" + [(set_attr "type" "call")] +@@ -7772,6 +7863,13 @@ + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "* + { ++ rtx op = operands[1]; + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} ++ /* Switch mode now when possible. */ ++ if (SYMBOL_REF_DECL (op) && !TREE_PUBLIC (SYMBOL_REF_DECL (op)) ++ && arm_arch5 && arm_change_mode_p (SYMBOL_REF_DECL (op))) ++ return NEED_PLT_RELOC ? \"blx%?\\t%a1(PLT)\" : \"blx%?\\t(%a1)\"; ++ + return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\"; + }" + [(set_attr "type" "call")] +@@ -7885,7 +7983,7 @@ + ) + + (define_expand "return" +- [(returns)] ++ [(RETURNS)] + "(TARGET_ARM || (TARGET_THUMB2 + && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL + && !IS_STACKALIGN (arm_current_func_type ()))) +@@ -7923,7 +8021,7 @@ + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) +- (returns) ++ (RETURNS) + (pc)))] + "TARGET_ARM " + "* +@@ -7946,7 +8044,7 @@ + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (pc) +- (returns)))] ++ (RETURNS)))] + "TARGET_ARM " + "* + { +@@ -8280,7 +8378,7 @@ + + (define_insn "*_multsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") +- (shiftable_ops:SI ++ (SHIFTABLE_OPS:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "power_of_two_operand" "")) + (match_operand:SI 1 "s_register_operand" "rk,")))] +@@ -8294,7 +8392,7 @@ + + (define_insn "*_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") +- (shiftable_ops:SI ++ (SHIFTABLE_OPS:SI + (match_operator:SI 2 "shift_nomul_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,M,r")]) +@@ -8690,7 +8788,14 @@ + return \"\"; + " + [(set_attr "conds" "use") +- (set_attr "type" "mov_reg,mov_reg,multiple") ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (if_then_else (match_operand 1 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (const_string "multiple")]) + (set_attr "length" "4,4,8")] + ) + +@@ -9486,8 +9591,8 @@ + (const_string "alu_imm" ) + (const_string "alu_sreg")) + (const_string "alu_imm") +- (const_string "alu_sreg") +- (const_string "alu_sreg")])] ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_move_plus" +@@ -9524,7 +9629,13 @@ + sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,8") +- (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 3 "const_int_operand" "") ++ (const_string "alu_imm" ) ++ (const_string "alu_sreg")) ++ (const_string "alu_imm") ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_arith_arith" +@@ -9619,7 +9730,11 @@ + %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") +- (set_attr "type" "alu_shift_reg,multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 3 "const_int_operand" "") ++ (const_string "alu_shift_imm" ) ++ (const_string "alu_shift_reg")) ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_move_arith" +@@ -9680,7 +9795,11 @@ + %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") +- (set_attr "type" "alu_shift_reg,multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 3 "const_int_operand" "") ++ (const_string "alu_shift_imm" ) ++ (const_string "alu_shift_reg")) ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_move_not" +@@ -9787,7 +9906,12 @@ + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") +- (set_attr "type" "mov_shift_reg,multiple,multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 3 "const_int_operand" "") ++ (const_string "mov_shift" ) ++ (const_string "mov_shift_reg")) ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_move_shift" +@@ -9825,7 +9949,12 @@ + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") +- (set_attr "type" "mov_shift_reg,multiple,multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 3 "const_int_operand" "") ++ (const_string "mov_shift" ) ++ (const_string "mov_shift_reg")) ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*ifcompare_shift_shift" +@@ -10906,7 +11035,7 @@ + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "length" "4") +- (set_attr "type" "mov_imm")] ++ (set_attr "type" "alu_sreg")] + ) + + (define_insn "*arm_rev" +--- a/src/gcc/config/arm/arm.opt ++++ b/src/gcc/config/arm/arm.opt +@@ -122,6 +122,10 @@ Enum(float_abi_type) String(softfp) Value(ARM_FLOAT_ABI_SOFTFP) + EnumValue + Enum(float_abi_type) String(hard) Value(ARM_FLOAT_ABI_HARD) + ++mflip-thumb ++Target Report Var(TARGET_FLIP_THUMB) Undocumented ++Switch ARM/Thumb modes on alternating functions for compiler testing ++ + mfp16-format= + Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_NONE) + Specify the __fp16 floating-point format +@@ -182,7 +186,7 @@ Target RejectNegative Joined UInteger Var(arm_structure_size_boundary) Init(DEFA + Specify the minimum bit alignment of structures + + mthumb +-Target Report RejectNegative Mask(THUMB) ++Target Report RejectNegative Mask(THUMB) Save + Generate code for Thumb state + + mthumb-interwork +@@ -246,7 +250,7 @@ Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) + Only generate absolute relocations on word sized values. + + mrestrict-it +-Target Report Var(arm_restrict_it) Init(2) ++Target Report Var(arm_restrict_it) Init(2) Save + Generate IT blocks appropriate for ARMv8. + + mold-rtx-costs +@@ -275,5 +279,5 @@ Target Report Var(target_slow_flash_data) Init(0) + Assume loading data from flash is slower than fetching instructions. + + masm-syntax-unified +-Target Report Var(inline_asm_unified) Init(0) ++Target Report Var(inline_asm_unified) Init(0) Save + Assume unified syntax for Thumb inline assembly code. +--- a/src/gcc/config/arm/arm_neon_builtins.def ++++ b/src/gcc/config/arm/arm_neon_builtins.def +@@ -67,28 +67,28 @@ VAR8 (BINOP, vqshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) + VAR8 (BINOP, vqshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) + VAR8 (BINOP, vqrshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) + VAR8 (BINOP, vqrshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR3 (GETLANE, vshrn_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vrshrn_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqshrns_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqshrnu_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqrshrns_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqrshrnu_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqshrun_n, v8hi, v4si, v2di) +-VAR3 (GETLANE, vqrshrun_n, v8hi, v4si, v2di) +-VAR8 (GETLANE, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (GETLANE, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR3 (GETLANE, vshlls_n, v8qi, v4hi, v2si) +-VAR3 (GETLANE, vshllu_n, v8qi, v4hi, v2si) +-VAR8 (SETLANE, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (SETLANE, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (SETLANE, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (SETLANE, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vshrn_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vrshrn_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqshrns_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqshrnu_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR3 (BINOP_IMM, vshlls_n, v8qi, v4hi, v2si) ++VAR3 (BINOP_IMM, vshllu_n, v8qi, v4hi, v2si) ++VAR8 (TERNOP_IMM, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) + VAR2 (BINOP, vsub, v2sf, v4sf) + VAR3 (BINOP, vsubls, v8qi, v4hi, v2si) + VAR3 (BINOP, vsublu, v8qi, v4hi, v2si) +@@ -140,8 +140,8 @@ VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si) + VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) + VAR2 (BINOP, vrecps, v2sf, v4sf) + VAR2 (BINOP, vrsqrts, v2sf, v4sf) +-VAR8 (SETLANE, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) +-VAR8 (SETLANE, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) ++VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) + VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) + VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) + VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) +@@ -162,7 +162,7 @@ VAR10 (SETLANE, vset_lane, + VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di) + VAR10 (UNOP, vdup_n, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +-VAR10 (BINOP, vdup_lane, ++VAR10 (GETLANE, vdup_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) + VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) + VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di) +@@ -174,23 +174,23 @@ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) + VAR3 (UNOP, vmovls, v8qi, v4hi, v2si) + VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si) + VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR2 (LANEMAC, vmlals_lane, v4hi, v2si) +-VAR2 (LANEMAC, vmlalu_lane, v4hi, v2si) +-VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) +-VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR2 (LANEMAC, vmlsls_lane, v4hi, v2si) +-VAR2 (LANEMAC, vmlslu_lane, v4hi, v2si) +-VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) ++VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) ++VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si) ++VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si) ++VAR2 (MAC_LANE, vqdmlal_lane, v4hi, v2si) ++VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) ++VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si) ++VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si) ++VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si) + VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR6 (LANEMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR2 (LANEMAC, vmlals_n, v4hi, v2si) +-VAR2 (LANEMAC, vmlalu_n, v4hi, v2si) +-VAR2 (LANEMAC, vqdmlal_n, v4hi, v2si) +-VAR6 (LANEMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +-VAR2 (LANEMAC, vmlsls_n, v4hi, v2si) +-VAR2 (LANEMAC, vmlslu_n, v4hi, v2si) +-VAR2 (LANEMAC, vqdmlsl_n, v4hi, v2si) ++VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) ++VAR2 (MAC_N, vmlals_n, v4hi, v2si) ++VAR2 (MAC_N, vmlalu_n, v4hi, v2si) ++VAR2 (MAC_N, vqdmlal_n, v4hi, v2si) ++VAR6 (MAC_N, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) ++VAR2 (MAC_N, vmlsls_n, v4hi, v2si) ++VAR2 (MAC_N, vmlslu_n, v4hi, v2si) ++VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si) + VAR10 (SETLANE, vext, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) + VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) +--- a/src/gcc/config/arm/cortex-a53.md ++++ b/src/gcc/config/arm/cortex-a53.md +@@ -360,7 +360,7 @@ + ;; Crude Advanced SIMD approximation. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +-(define_insn_reservation "cortex_53_advsimd" 4 ++(define_insn_reservation "cortex_a53_advsimd" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "is_neon_type" "yes")) + "cortex_a53_simd0") +--- a/src/gcc/config/arm/driver-arm.c ++++ b/src/gcc/config/arm/driver-arm.c +@@ -35,6 +35,9 @@ static struct vendor_cpu arm_cpu_table[] = { + {"0xb02", "armv6k", "mpcore"}, + {"0xb36", "armv6j", "arm1136j-s"}, + {"0xb56", "armv6t2", "arm1156t2-s"}, ++ /* armv6kz is the correct spelling for ARMv6KZ but may not be supported in ++ the version of binutils used. The incorrect spelling is supported in ++ legacy and current binutils so that is used instead. */ + {"0xb76", "armv6zk", "arm1176jz-s"}, + {"0xc05", "armv7-a", "cortex-a5"}, + {"0xc07", "armv7ve", "cortex-a7"}, +--- a/src/gcc/config/arm/elf.h ++++ b/src/gcc/config/arm/elf.h +@@ -120,7 +120,6 @@ + { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork", "fno-leading-underscore" } + #endif + +-#define TARGET_ASM_FILE_START_APP_OFF true + #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + + +--- a/src/gcc/config/arm/iterators.md ++++ b/src/gcc/config/arm/iterators.md +@@ -181,39 +181,53 @@ + ;; compare a second time. + (define_code_iterator LTUGEU [ltu geu]) + ++;; The signed gt, ge comparisons ++(define_code_iterator GTGE [gt ge]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} ++;; The unsigned gt, ge comparisons ++(define_code_iterator GTUGEU [gtu geu]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} ++;; Comparisons for vc ++(define_code_iterator COMPARISONS [eq gt ge le lt]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} + ;; A list of ... +-(define_code_iterator ior_xor [ior xor]) ++(define_code_iterator IOR_XOR [ior xor]) + + ;; Operations on two halves of a quadword vector. +-(define_code_iterator vqh_ops [plus smin smax umin umax]) ++(define_code_iterator VQH_OPS [plus smin smax umin umax]) + + ;; Operations on two halves of a quadword vector, + ;; without unsigned variants (for use with *SFmode pattern). +-(define_code_iterator vqhs_ops [plus smin smax]) ++(define_code_iterator VQHS_OPS [plus smin smax]) + + ;; A list of widening operators + (define_code_iterator SE [sign_extend zero_extend]) + + ;; Right shifts +-(define_code_iterator rshifts [ashiftrt lshiftrt]) ++(define_code_iterator RSHIFTS [ashiftrt lshiftrt]) + + ;; Iterator for integer conversions + (define_code_iterator FIXUORS [fix unsigned_fix]) + + ;; Binary operators whose second operand can be shifted. +-(define_code_iterator shiftable_ops [plus minus ior xor and]) ++(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and]) + +-;; plus and minus are the only shiftable_ops for which Thumb2 allows ++;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows + ;; a stack pointer opoerand. The minus operation is a candidate for an rsub + ;; and hence only plus is supported. + (define_code_attr t2_binop0 + [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")]) + +-;; The instruction to use when a shiftable_ops has a shift operation as ++;; The instruction to use when a SHIFTABLE_OPS has a shift operation as + ;; its first operand. + (define_code_attr arith_shift_insn + [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")]) + ++(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le") ++ (gtu "gt") (geu "ge")]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} ++(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} + ;;---------------------------------------------------------------------------- + ;; Int iterators + ;;---------------------------------------------------------------------------- +@@ -221,6 +235,10 @@ + (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM + UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) + ++(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} ++(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT]) + -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} + (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) + + (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM +@@ -677,6 +695,11 @@ + + ]) + ++(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt") ++ (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") ++ (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") ++ (UNSPEC_VCAGT "gt")]) + - /* vstn */ + (define_int_attr r [ + (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r") + (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "") +@@ -774,7 +797,7 @@ + (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")]) + + ;; Both kinds of return insn. +-(define_code_iterator returns [return simple_return]) ++(define_code_iterator RETURNS [return simple_return]) + (define_code_attr return_str [(return "") (simple_return "simple_")]) + (define_code_attr return_simple_p [(return "false") (simple_return "true")]) + (define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)") +--- a/src/gcc/config/arm/iwmmxt.md ++++ b/src/gcc/config/arm/iwmmxt.md +@@ -107,8 +107,8 @@ + ) + + (define_insn "*iwmmxt_arm_movdi" +- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv") +- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))] ++ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv") ++ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,r,y,Uy,y, r,*w,*w,*Uvi,*w"))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" +--- a/src/gcc/config/arm/linux-eabi.h ++++ b/src/gcc/config/arm/linux-eabi.h +@@ -77,6 +77,23 @@ + %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ + %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" + ++/* For ARM musl currently supports four dynamic linkers: ++ - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI ++ - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI ++ - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB ++ - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB ++ musl does not support the legacy OABI mode. ++ All the dynamic linkers live in /lib. ++ We default to soft-float, EL. */ ++#undef MUSL_DYNAMIC_LINKER ++#if TARGET_BIG_ENDIAN_DEFAULT ++#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}" ++#else ++#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}" ++#endif ++#define MUSL_DYNAMIC_LINKER \ ++ "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1" ++ + /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ + #undef LINK_SPEC +@@ -107,6 +124,7 @@ + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ ++ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + + /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we +--- a/src/gcc/config/arm/neon.md ++++ b/src/gcc/config/arm/neon.md +@@ -1114,7 +1114,7 @@ + ;; lshrdi3_neon + (define_insn_and_split "di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") +- (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") ++ (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) +@@ -1194,71 +1194,6 @@ + [(set_attr "type" "neon_add_widen")] + ) + +-;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit +-;; shift-count granularity. That's good enough for the middle-end's current +-;; needs. +- +-;; Note that it's not safe to perform such an operation in big-endian mode, +-;; due to element-ordering issues. +- +-(define_expand "vec_shr_" +- [(match_operand:VDQ 0 "s_register_operand" "") +- (match_operand:VDQ 1 "s_register_operand" "") +- (match_operand:SI 2 "const_multiple_of_8_operand" "")] +- "TARGET_NEON && !BYTES_BIG_ENDIAN" +-{ +- rtx zero_reg; +- HOST_WIDE_INT num_bits = INTVAL (operands[2]); +- const int width = GET_MODE_BITSIZE (mode); +- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; +- rtx (*gen_ext) (rtx, rtx, rtx, rtx) = +- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; +- +- if (num_bits == width) +- { +- emit_move_insn (operands[0], operands[1]); +- DONE; +- } +- +- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); +- operands[0] = gen_lowpart (bvecmode, operands[0]); +- operands[1] = gen_lowpart (bvecmode, operands[1]); +- +- emit_insn (gen_ext (operands[0], operands[1], zero_reg, +- GEN_INT (num_bits / BITS_PER_UNIT))); +- DONE; +-}) +- +-(define_expand "vec_shl_" +- [(match_operand:VDQ 0 "s_register_operand" "") +- (match_operand:VDQ 1 "s_register_operand" "") +- (match_operand:SI 2 "const_multiple_of_8_operand" "")] +- "TARGET_NEON && !BYTES_BIG_ENDIAN" +-{ +- rtx zero_reg; +- HOST_WIDE_INT num_bits = INTVAL (operands[2]); +- const int width = GET_MODE_BITSIZE (mode); +- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; +- rtx (*gen_ext) (rtx, rtx, rtx, rtx) = +- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; +- +- if (num_bits == 0) +- { +- emit_move_insn (operands[0], CONST0_RTX (mode)); +- DONE; +- } +- +- num_bits = width - num_bits; +- +- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); +- operands[0] = gen_lowpart (bvecmode, operands[0]); +- operands[1] = gen_lowpart (bvecmode, operands[1]); +- +- emit_insn (gen_ext (operands[0], zero_reg, operands[1], +- GEN_INT (num_bits / BITS_PER_UNIT))); +- DONE; +-}) +- + ;; Helpers for quad-word reduction operations + + ; Add (or smin, smax...) the low N/2 elements of the N-element vector +@@ -1267,7 +1202,7 @@ + + (define_insn "quad_halves_v4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") +- (vqh_ops:V2SI ++ (VQH_OPS:V2SI + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SI (match_dup 1) +@@ -1280,7 +1215,7 @@ + + (define_insn "quad_halves_v4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") +- (vqhs_ops:V2SF ++ (VQHS_OPS:V2SF + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SF (match_dup 1) +@@ -1293,7 +1228,7 @@ - __extension__ static __inline void -@@ -23887,7 +23769,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vtst_s64 (int64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll}; -+ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); - } + (define_insn "quad_halves_v8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "+w") +- (vqh_ops:V4HI ++ (VQH_OPS:V4HI + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) +@@ -1308,7 +1243,7 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -@@ -23911,7 +23793,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b) - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vtst_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll}; -+ return ((__a & __b) != __AARCH64_UINT64_C (0)); - } + (define_insn "quad_halves_v16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "+w") +- (vqh_ops:V8QI ++ (VQH_OPS:V8QI + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) +@@ -2200,134 +2135,140 @@ + [(set_attr "type" "neon_sub_halve_narrow_q")] + ) - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ---- a/src//dev/null -+++ b/src/gcc/config/aarch64/driver-aarch64.c -@@ -0,0 +1,307 @@ -+/* Native CPU detection for aarch64. -+ Copyright (C) 2015 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#include "config.h" -+#include "system.h" -+ -+struct arch_extension -+{ -+ const char *ext; -+ const char *feat_string; -+}; -+ -+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \ -+ { EXT_NAME, FEATURE_STRING }, -+static struct arch_extension ext_to_feat_string[] = -+{ -+#include "aarch64-option-extensions.def" -+}; -+#undef AARCH64_OPT_EXTENSION -+ -+ -+struct aarch64_core_data -+{ -+ const char* name; -+ const char* arch; -+ const char* implementer_id; -+ const char* part_no; -+}; -+ -+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ -+ { CORE_NAME, #ARCH, IMP, PART }, -+ -+static struct aarch64_core_data cpu_data [] = -+{ -+#include "aarch64-cores.def" -+ { NULL, NULL, NULL, NULL } -+}; -+ -+#undef AARCH64_CORE -+ -+struct aarch64_arch -+{ -+ const char* id; -+ const char* name; -+}; -+ -+#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ -+ { #ARCH, NAME }, -+ -+static struct aarch64_arch aarch64_arches [] = -+{ -+#include "aarch64-arches.def" -+ {NULL, NULL} -+}; -+ -+#undef AARCH64_ARCH -+ -+/* Return the full architecture name string corresponding to the -+ identifier ID. */ -+ -+static const char* -+get_arch_name_from_id (const char* id) -+{ -+ unsigned int i = 0; -+ -+ for (i = 0; aarch64_arches[i].id != NULL; i++) -+ { -+ if (strcmp (id, aarch64_arches[i].id) == 0) -+ return aarch64_arches[i].name; -+ } -+ -+ return NULL; -+} -+ -+ -+/* Check wether the string CORE contains the same CPU part numbers -+ as BL_STRING. For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03" -+ should return true. */ -+ -+static bool -+valid_bL_string_p (const char** core, const char* bL_string) -+{ -+ return strstr (bL_string, core[0]) != NULL -+ && strstr (bL_string, core[1]) != NULL; -+} -+ -+/* Return true iff ARR contains STR in one of its two elements. */ -+ -+static bool -+contains_string_p (const char** arr, const char* str) -+{ -+ bool res = false; -+ -+ if (arr[0] != NULL) -+ { -+ res = strstr (arr[0], str) != NULL; -+ if (res) -+ return res; -+ -+ if (arr[1] != NULL) -+ return strstr (arr[1], str) != NULL; -+ } -+ -+ return false; +-(define_insn "neon_vceq" +- [(set (match_operand: 0 "s_register_operand" "=w,w") +- (unspec: +- [(match_operand:VDQW 1 "s_register_operand" "w,w") +- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] +- UNSPEC_VCEQ))] ++;; These may expand to an UNSPEC pattern when a floating point mode is used ++;; without unsafe math optimizations. ++(define_expand "neon_vc" ++ [(match_operand: 0 "s_register_operand" "=w,w") ++ (neg: ++ (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w") ++ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))] + "TARGET_NEON" +- "@ +- vceq.\t%0, %1, %2 +- vceq.\t%0, %1, #0" +- [(set (attr "type") +- (if_then_else (match_test "") +- (const_string "neon_fp_compare_s") +- (if_then_else (match_operand 2 "zero_operand") +- (const_string "neon_compare_zero") +- (const_string "neon_compare"))))] ++ { ++ /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations ++ are enabled. */ ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT ++ && !flag_unsafe_math_optimizations) ++ { ++ /* We don't just emit a gen_neon_vc_insn_unspec because ++ we define gen_neon_vceq_insn_unspec only for float modes ++ whereas this expander iterates over the integer modes as well, ++ but we will never expand to UNSPECs for the integer comparisons. */ ++ switch (mode) ++ { ++ case V2SFmode: ++ emit_insn (gen_neon_vcv2sf_insn_unspec (operands[0], ++ operands[1], ++ operands[2])); ++ break; ++ case V4SFmode: ++ emit_insn (gen_neon_vcv4sf_insn_unspec (operands[0], ++ operands[1], ++ operands[2])); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ else ++ emit_insn (gen_neon_vc_insn (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++ } + ) + +-(define_insn "neon_vcge" ++(define_insn "neon_vc_insn" + [(set (match_operand: 0 "s_register_operand" "=w,w") +- (unspec: +- [(match_operand:VDQW 1 "s_register_operand" "w,w") +- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] +- UNSPEC_VCGE))] +- "TARGET_NEON" +- "@ +- vcge.\t%0, %1, %2 +- vcge.\t%0, %1, #0" ++ (neg: ++ (COMPARISONS: ++ (match_operand:VDQW 1 "s_register_operand" "w,w") ++ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] ++ "TARGET_NEON && !(GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT ++ && !flag_unsafe_math_optimizations)" ++ { ++ char pattern[100]; ++ sprintf (pattern, "vc.%s%%#\t%%0," ++ " %%1, %s", ++ GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT ++ ? "f" : "", ++ which_alternative == 0 ++ ? "%2" : "#0"); ++ output_asm_insn (pattern, operands); ++ return ""; ++ } + [(set (attr "type") +- (if_then_else (match_test "") +- (const_string "neon_fp_compare_s") +- (if_then_else (match_operand 2 "zero_operand") ++ (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") +- (const_string "neon_compare"))))] ++ (const_string "neon_compare")))] + ) + +-(define_insn "neon_vcgeu" +- [(set (match_operand: 0 "s_register_operand" "=w") +- (unspec: +- [(match_operand:VDQIW 1 "s_register_operand" "w") +- (match_operand:VDQIW 2 "s_register_operand" "w")] +- UNSPEC_VCGEU))] +- "TARGET_NEON" +- "vcge.u%#\t%0, %1, %2" +- [(set_attr "type" "neon_compare")] +-) +- +-(define_insn "neon_vcgt" ++(define_insn "neon_vc_insn_unspec" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: +- [(match_operand:VDQW 1 "s_register_operand" "w,w") +- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] +- UNSPEC_VCGT))] ++ [(match_operand:VCVTF 1 "s_register_operand" "w,w") ++ (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] ++ NEON_VCMP))] + "TARGET_NEON" +- "@ +- vcgt.\t%0, %1, %2 +- vcgt.\t%0, %1, #0" +- [(set (attr "type") +- (if_then_else (match_test "") +- (const_string "neon_fp_compare_s") +- (if_then_else (match_operand 2 "zero_operand") +- (const_string "neon_compare_zero") +- (const_string "neon_compare"))))] ++ { ++ char pattern[100]; ++ sprintf (pattern, "vc.f%%#\t%%0," ++ " %%1, %s", ++ which_alternative == 0 ++ ? "%2" : "#0"); ++ output_asm_insn (pattern, operands); ++ return ""; +} ++ [(set_attr "type" "neon_fp_compare_s")] + ) + +-(define_insn "neon_vcgtu" ++(define_insn "neon_vcu" + [(set (match_operand: 0 "s_register_operand" "=w") +- (unspec: +- [(match_operand:VDQIW 1 "s_register_operand" "w") +- (match_operand:VDQIW 2 "s_register_operand" "w")] +- UNSPEC_VCGTU))] ++ (neg: ++ (GTUGEU: ++ (match_operand:VDQIW 1 "s_register_operand" "w") ++ (match_operand:VDQIW 2 "s_register_operand" "w"))))] + "TARGET_NEON" +- "vcgt.u%#\t%0, %1, %2" ++ "vc.u%#\t%0, %1, %2" + [(set_attr "type" "neon_compare")] + ) + +-;; VCLE and VCLT only support comparisons with immediate zero (register +-;; variants are VCGE and VCGT with operands reversed). +- +-(define_insn "neon_vcle" +- [(set (match_operand: 0 "s_register_operand" "=w") +- (unspec: +- [(match_operand:VDQW 1 "s_register_operand" "w") +- (match_operand:VDQW 2 "zero_operand" "Dz")] +- UNSPEC_VCLE))] ++(define_expand "neon_vca" ++ [(set (match_operand: 0 "s_register_operand") ++ (neg: ++ (GTGE: ++ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) ++ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] + "TARGET_NEON" +- "vcle.\t%0, %1, #0" +- [(set (attr "type") +- (if_then_else (match_test "") +- (const_string "neon_fp_compare_s") +- (if_then_else (match_operand 2 "zero_operand") +- (const_string "neon_compare_zero") +- (const_string "neon_compare"))))] +-) +- +-(define_insn "neon_vclt" +- [(set (match_operand: 0 "s_register_operand" "=w") +- (unspec: +- [(match_operand:VDQW 1 "s_register_operand" "w") +- (match_operand:VDQW 2 "zero_operand" "Dz")] +- UNSPEC_VCLT))] +- "TARGET_NEON" +- "vclt.\t%0, %1, #0" +- [(set (attr "type") +- (if_then_else (match_test "") +- (const_string "neon_fp_compare_s") +- (if_then_else (match_operand 2 "zero_operand") +- (const_string "neon_compare_zero") +- (const_string "neon_compare"))))] ++ { ++ if (flag_unsafe_math_optimizations) ++ emit_insn (gen_neon_vca_insn (operands[0], operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_neon_vca_insn_unspec (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++ } + ) + +-(define_insn "neon_vcage" ++(define_insn "neon_vca_insn" + [(set (match_operand: 0 "s_register_operand" "=w") +- (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") +- (match_operand:VCVTF 2 "s_register_operand" "w")] +- UNSPEC_VCAGE))] +- "TARGET_NEON" +- "vacge.\t%0, %1, %2" ++ (neg: ++ (GTGE: ++ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) ++ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] ++ "TARGET_NEON && flag_unsafe_math_optimizations" ++ "vac.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] + ) + +-(define_insn "neon_vcagt" ++(define_insn "neon_vca_insn_unspec" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w")] +- UNSPEC_VCAGT))] ++ NEON_VACMP))] + "TARGET_NEON" +- "vacgt.\t%0, %1, %2" ++ "vac.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] + ) + +@@ -2722,8 +2663,6 @@ + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); +- + if (BYTES_BIG_ENDIAN) + { + /* The intrinsics are defined in terms of a model where the +@@ -2753,8 +2692,6 @@ + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); +- + if (BYTES_BIG_ENDIAN) + { + /* The intrinsics are defined in terms of a model where the +@@ -2784,7 +2721,6 @@ + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; + }) +@@ -2795,18 +2731,11 @@ + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" + { +- switch (INTVAL (operands[2])) +- { +- case 0: +- emit_move_insn (operands[0], gen_lowpart (DImode, operands[1])); +- break; +- case 1: +- emit_move_insn (operands[0], gen_highpart (DImode, operands[1])); +- break; +- default: +- neon_lane_bounds (operands[2], 0, 1); +- FAIL; +- } ++ int lane = INTVAL (operands[2]); ++ gcc_assert ((lane ==0) || (lane == 1)); ++ emit_move_insn (operands[0], lane == 0 ++ ? gen_lowpart (DImode, operands[1]) ++ : gen_highpart (DImode, operands[1])); + DONE; + }) + +@@ -2818,7 +2747,6 @@ + "TARGET_NEON" + { + unsigned int elt = INTVAL (operands[3]); +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + + if (BYTES_BIG_ENDIAN) + { +@@ -2841,7 +2769,6 @@ + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; + }) +@@ -2923,7 +2850,6 @@ + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); +@@ -2944,7 +2870,6 @@ + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; + }) +@@ -2956,7 +2881,6 @@ + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" + { +- neon_lane_bounds (operands[2], 0, 1); + emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); + DONE; + }) +@@ -3156,7 +3080,6 @@ + UNSPEC_VMUL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%P0, %P1, %P2[%c3]"; + } + [(set (attr "type") +@@ -3174,7 +3097,6 @@ + UNSPEC_VMUL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%q0, %q1, %P2[%c3]"; + } + [(set (attr "type") +@@ -3192,7 +3114,6 @@ + VMULL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmull.%#\t%q0, %P1, %P2[%c3]"; + } + [(set_attr "type" "neon_mul__scalar_long")] +@@ -3207,7 +3128,6 @@ + UNSPEC_VQDMULL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vqdmull.\t%q0, %P1, %P2[%c3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +@@ -3222,7 +3142,6 @@ + VQDMULH_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vqdmulh.\t%q0, %q1, %P2[%c3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_q")] +@@ -3237,7 +3156,6 @@ + VQDMULH_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vqdmulh.\t%P0, %P1, %P2[%c3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_q")] +@@ -3253,7 +3171,6 @@ + UNSPEC_VMLA_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%P0, %P2, %P3[%c4]"; + } + [(set (attr "type") +@@ -3272,7 +3189,6 @@ + UNSPEC_VMLA_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%q0, %q2, %P3[%c4]"; + } + [(set (attr "type") +@@ -3291,7 +3207,6 @@ + VMLAL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlal.%#\t%q0, %P2, %P3[%c4]"; + } + [(set_attr "type" "neon_mla__scalar_long")] +@@ -3307,7 +3222,6 @@ + UNSPEC_VQDMLAL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlal.\t%q0, %P2, %P3[%c4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +@@ -3323,7 +3237,6 @@ + UNSPEC_VMLS_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%P0, %P2, %P3[%c4]"; + } + [(set (attr "type") +@@ -3342,7 +3255,6 @@ + UNSPEC_VMLS_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%q0, %q2, %P3[%c4]"; + } + [(set (attr "type") +@@ -3361,7 +3273,6 @@ + VMLSL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlsl.%#\t%q0, %P2, %P3[%c4]"; + } + [(set_attr "type" "neon_mla__scalar_long")] +@@ -3377,7 +3288,6 @@ + UNSPEC_VQDMLSL_LANE))] + "TARGET_NEON" + { +- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlsl.\t%q0, %P2, %P3[%c4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +--- a/src/gcc/config/arm/sync.md ++++ b/src/gcc/config/arm/sync.md +@@ -50,14 +50,11 @@ + { + if (TARGET_HAVE_DMB) + { +- /* Note we issue a system level barrier. We should consider issuing +- a inner shareabilty zone barrier here instead, ie. "DMB ISH". */ +- /* ??? Differentiate based on SEQ_CST vs less strict? */ +- return "dmb\tsy"; ++ return "dmb\\tish"; + } + + if (TARGET_HAVE_DMB_MCR) +- return "mcr\tp15, 0, r0, c7, c10, 5"; ++ return "mcr\\tp15, 0, r0, c7, c10, 5"; + + gcc_unreachable (); + } +--- a/src/gcc/config/arm/thumb2.md ++++ b/src/gcc/config/arm/thumb2.md +@@ -300,7 +300,7 @@ + ldr%?\\t%0, %1 + str%?\\t%1, %0 + str%?\\t%1, %0" +- [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1") ++ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1") + (set_attr "length" "2,4,2,4,4,4,4,4,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") +@@ -486,12 +486,12 @@ + ) + + (define_insn_and_split "*thumb2_movsicc_insn" +- [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r") ++ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) +- (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r") +- (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))] ++ (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r") ++ (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))] + "TARGET_THUMB2" + "@ + it\\t%D3\;mov%D3\\t%0, %2 +@@ -504,12 +504,14 @@ + # + # + # ++ # + #" + ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 +- ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 +- ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 +- ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 +- ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 ++ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 ++ ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 ++ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 ++ ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 ++ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + "&& reload_completed" + [(const_int 0)] + { +@@ -540,10 +542,30 @@ + operands[2]))); + DONE; + } +- [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6") +- (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes") ++ [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6") ++ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes") + (set_attr "conds" "use") +- (set_attr "type" "multiple")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (if_then_else (match_operand 1 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (const_string "mvn_imm") ++ (if_then_else (match_operand 1 "const_int_operand" "") ++ (const_string "mov_imm") ++ (const_string "mov_reg")) ++ (const_string "mvn_imm") ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple") ++ (const_string "multiple")])] + ) + + (define_insn "*thumb2_movsfcc_soft_insn" +@@ -1182,7 +1204,11 @@ + " + [(set_attr "predicable" "yes") + (set_attr "length" "2") +- (set_attr "type" "alu_sreg")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "alu_imm") ++ (const_string "alu_sreg")) ++ (const_string "alu_imm")])] + ) + + (define_insn "*thumb2_subsi_short" +@@ -1247,14 +1273,21 @@ + " + [(set_attr "conds" "set") + (set_attr "length" "2,2,4") +- (set_attr "type" "alu_sreg")] ++ (set_attr_alternative "type" ++ [(if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "alus_imm") ++ (const_string "alus_sreg")) ++ (const_string "alus_imm") ++ (if_then_else (match_operand 2 "const_int_operand" "") ++ (const_string "alus_imm") ++ (const_string "alus_sreg"))])] + ) + + (define_insn "*thumb2_addsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV +- (plus:SI (match_operand:SI 0 "s_register_operand" "l,l, r,r") +- (match_operand:SI 1 "arm_add_operand" "Pv,l,IL,r")) ++ (plus:SI (match_operand:SI 0 "s_register_operand" "l, r") ++ (match_operand:SI 1 "arm_add_operand" "lPv,rIL")) + (const_int 0)))] + "TARGET_THUMB2" + "* +@@ -1271,8 +1304,10 @@ + return \"cmn\\t%0, %1\"; + " + [(set_attr "conds" "set") +- (set_attr "length" "2,2,4,4") +- (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")] ++ (set_attr "length" "2,4") ++ (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "") ++ (const_string "alus_imm") ++ (const_string "alus_sreg")))] + ) + + (define_insn "*thumb2_mulsi_short" +--- a/src/gcc/config/arm/unknown-elf.h ++++ b/src/gcc/config/arm/unknown-elf.h +@@ -32,7 +32,9 @@ + #define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" + + #undef STARTFILE_SPEC +-#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC ++#define STARTFILE_SPEC \ ++ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \ ++ UNKNOWN_ELF_STARTFILE_SPEC + + #define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s" + +@@ -80,7 +82,9 @@ + \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ +- fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \ ++ fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1); \ ++ fprintf (FILE, "\t.size\t%s, %d\n", \ ++ NAME, SIZE ? (int) SIZE : 1); \ + } \ + while (0) + +--- a/src/gcc/config/arm/vxworks.h ++++ b/src/gcc/config/arm/vxworks.h +@@ -40,7 +40,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + builtin_define ("CPU=ARMARCH5"); \ + else if (arm_arch4) \ + { \ +- if (thumb_code) \ ++ if (TARGET_THUMB) \ + builtin_define ("CPU=ARMARCH4_T"); \ + else \ + builtin_define ("CPU=ARMARCH4"); \ +--- a/src/gcc/config/c6x/c6x.c ++++ b/src/gcc/config/c6x/c6x.c +@@ -3532,7 +3532,7 @@ try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs, + best_reg = + find_rename_reg (this_head, super_class, &unavailable, old_reg, true); + +- regrename_do_replace (this_head, best_reg); ++ gcc_assert (regrename_do_replace (this_head, best_reg)); + + count_unit_reqs (new_reqs, head, PREV_INSN (tail)); + merge_unit_reqs (new_reqs); +@@ -3545,7 +3545,7 @@ try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs, + unit_req_imbalance (reqs), unit_req_imbalance (new_reqs)); + } + if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs)) +- regrename_do_replace (this_head, old_reg); ++ gcc_assert (regrename_do_replace (this_head, old_reg)); + else + memcpy (reqs, new_reqs, sizeof (unit_req_table)); + +--- a/src/gcc/config/glibc-stdint.h ++++ b/src/gcc/config/glibc-stdint.h +@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + ++/* Systems using musl libc should use this header and make sure ++ OPTION_MUSL is defined correctly before using the TYPE macros. */ ++#ifndef OPTION_MUSL ++#define OPTION_MUSL 0 ++#endif + -+/* This will be called by the spec parser in gcc.c when it sees -+ a %:local_cpu_detect(args) construct. Currently it will be called -+ with either "arch", "cpu" or "tune" as argument depending on if -+ -march=native, -mcpu=native or -mtune=native is to be substituted. -+ -+ It returns a string containing new command line parameters to be -+ put at the place of the above two options, depending on what CPU -+ this is executed. E.g. "-march=armv8-a" on a Cortex-A57 for -+ -march=native. If the routine can't detect a known processor, -+ the -march or -mtune option is discarded. -+ -+ For -mtune and -mcpu arguments it attempts to detect the CPU or -+ a big.LITTLE system. -+ ARGC and ARGV are set depending on the actual arguments given -+ in the spec. */ -+ -+const char * -+host_detect_local_cpu (int argc, const char **argv) -+{ -+ const char *arch_id = NULL; -+ const char *res = NULL; -+ static const int num_exts = ARRAY_SIZE (ext_to_feat_string); -+ char buf[128]; -+ FILE *f = NULL; -+ bool arch = false; -+ bool tune = false; -+ bool cpu = false; -+ unsigned int i = 0; -+ unsigned int core_idx = 0; -+ const char* imps[2] = { NULL, NULL }; -+ const char* cores[2] = { NULL, NULL }; -+ unsigned int n_cores = 0; -+ unsigned int n_imps = 0; -+ bool processed_exts = false; -+ const char *ext_string = ""; -+ -+ gcc_assert (argc); -+ -+ if (!argv[0]) -+ goto not_found; -+ -+ /* Are we processing -march, mtune or mcpu? */ -+ arch = strcmp (argv[0], "arch") == 0; -+ if (!arch) -+ tune = strcmp (argv[0], "tune") == 0; -+ -+ if (!arch && !tune) -+ cpu = strcmp (argv[0], "cpu") == 0; -+ -+ if (!arch && !tune && !cpu) -+ goto not_found; -+ -+ f = fopen ("/proc/cpuinfo", "r"); -+ -+ if (f == NULL) -+ goto not_found; -+ -+ /* Look through /proc/cpuinfo to determine the implementer -+ and then the part number that identifies a particular core. */ -+ while (fgets (buf, sizeof (buf), f) != NULL) -+ { -+ if (strstr (buf, "implementer") != NULL) -+ { -+ for (i = 0; cpu_data[i].name != NULL; i++) -+ if (strstr (buf, cpu_data[i].implementer_id) != NULL -+ && !contains_string_p (imps, cpu_data[i].implementer_id)) -+ { -+ if (n_imps == 2) -+ goto not_found; -+ -+ imps[n_imps++] = cpu_data[i].implementer_id; -+ -+ break; -+ } -+ continue; -+ } -+ -+ if (strstr (buf, "part") != NULL) -+ { -+ for (i = 0; cpu_data[i].name != NULL; i++) -+ if (strstr (buf, cpu_data[i].part_no) != NULL -+ && !contains_string_p (cores, cpu_data[i].part_no)) -+ { -+ if (n_cores == 2) -+ goto not_found; -+ -+ cores[n_cores++] = cpu_data[i].part_no; -+ core_idx = i; -+ arch_id = cpu_data[i].arch; -+ break; -+ } -+ continue; -+ } -+ if (!tune && !processed_exts && strstr (buf, "Features") != NULL) -+ { -+ for (i = 0; i < num_exts; i++) -+ { -+ bool enabled = true; -+ char *p = NULL; -+ char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL); -+ -+ p = strtok (feat_string, " "); -+ -+ while (p != NULL) -+ { -+ if (strstr (buf, p) == NULL) -+ { -+ enabled = false; -+ break; -+ } -+ p = strtok (NULL, " "); -+ } -+ ext_string = concat (ext_string, "+", enabled ? "" : "no", -+ ext_to_feat_string[i].ext, NULL); -+ } -+ processed_exts = true; -+ } -+ } -+ -+ fclose (f); -+ f = NULL; -+ -+ /* Weird cpuinfo format that we don't know how to handle. */ -+ if (n_cores == 0 || n_cores > 2 || n_imps != 1) -+ goto not_found; -+ -+ if (arch && !arch_id) -+ goto not_found; -+ -+ if (arch) -+ { -+ const char* arch_name = get_arch_name_from_id (arch_id); -+ -+ /* We got some arch indentifier that's not in aarch64-arches.def? */ -+ if (!arch_name) -+ goto not_found; -+ -+ res = concat ("-march=", arch_name, NULL); -+ } -+ /* We have big.LITTLE. */ -+ else if (n_cores == 2) -+ { -+ for (i = 0; cpu_data[i].name != NULL; i++) -+ { -+ if (strchr (cpu_data[i].part_no, '.') != NULL -+ && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0 -+ && valid_bL_string_p (cores, cpu_data[i].part_no)) -+ { -+ res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL); -+ break; -+ } -+ } -+ if (!res) -+ goto not_found; -+ } -+ /* The simple, non-big.LITTLE case. */ -+ else -+ { -+ if (strncmp (cpu_data[core_idx].implementer_id, imps[0], -+ strlen (imps[0]) - 1) != 0) -+ goto not_found; + #define SIG_ATOMIC_TYPE "int" + + #define INT8_TYPE "signed char" +@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") + + #define INT_FAST8_TYPE "signed char" +-#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") +-#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") ++#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int") ++#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int") + #define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int") + #define UINT_FAST8_TYPE "unsigned char" +-#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int") +-#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int") ++#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int") ++#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int") + #define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") + + #define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") +--- a/src/gcc/config/linux.h ++++ b/src/gcc/config/linux.h +@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) + #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) + #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) + #else + #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) + #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) + #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (linux_libc == LIBC_MUSL) + #endif + + #define GNU_USER_TARGET_OS_CPP_BUILTINS() \ +@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + } while (0) + + /* Determine which dynamic linker to use depending on whether GLIBC or +- uClibc or Bionic is the default C library and whether +- -muclibc or -mglibc or -mbionic has been passed to change the default. */ ++ uClibc or Bionic or musl is the default C library and whether ++ -muclibc or -mglibc or -mbionic or -mmusl has been passed to change ++ the default. */ + +-#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3) \ +- "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}" ++#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4) \ ++ "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}" + + #if DEFAULT_LIBC == LIBC_GLIBC +-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ +- CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B) ++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ ++ CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M) + #elif DEFAULT_LIBC == LIBC_UCLIBC +-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ +- CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B) ++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ ++ CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M) + #elif DEFAULT_LIBC == LIBC_BIONIC +-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ +- CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U) ++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ ++ CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M) ++#elif DEFAULT_LIBC == LIBC_MUSL ++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ ++ CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B) + #else + #error "Unsupported DEFAULT_LIBC" + #endif /* DEFAULT_LIBC */ +@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker" + #define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64" + #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32" ++/* Should be redefined for each target that supports musl. */ ++#define MUSL_DYNAMIC_LINKER "/dev/null" ++#define MUSL_DYNAMIC_LINKER32 "/dev/null" ++#define MUSL_DYNAMIC_LINKER64 "/dev/null" ++#define MUSL_DYNAMIC_LINKERX32 "/dev/null" + + #define GNU_USER_DYNAMIC_LINKER \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER, \ +- BIONIC_DYNAMIC_LINKER) ++ BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER) + #define GNU_USER_DYNAMIC_LINKER32 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \ +- BIONIC_DYNAMIC_LINKER32) ++ BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32) + #define GNU_USER_DYNAMIC_LINKER64 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \ +- BIONIC_DYNAMIC_LINKER64) ++ BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64) + #define GNU_USER_DYNAMIC_LINKERX32 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \ +- BIONIC_DYNAMIC_LINKERX32) ++ BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32) + + /* Whether we have Bionic libc runtime */ + #undef TARGET_HAS_BIONIC + #define TARGET_HAS_BIONIC (OPTION_BIONIC) + ++/* musl avoids problematic includes by rearranging the include directories. ++ * Unfortunately, this is mostly duplicated from cppdefault.c */ ++#if DEFAULT_LIBC == LIBC_MUSL ++#define INCLUDE_DEFAULTS_MUSL_GPP \ ++ { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, \ ++ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, \ ++ { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, \ ++ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 }, \ ++ { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, \ ++ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, + -+ res = concat ("-m", cpu ? "cpu" : "tune", "=", -+ cpu_data[core_idx].name, NULL); -+ } ++#ifdef LOCAL_INCLUDE_DIR ++#define INCLUDE_DEFAULTS_MUSL_LOCAL \ ++ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, \ ++ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 }, ++#else ++#define INCLUDE_DEFAULTS_MUSL_LOCAL ++#endif + -+ if (tune) -+ return res; ++#ifdef PREFIX_INCLUDE_DIR ++#define INCLUDE_DEFAULTS_MUSL_PREFIX \ ++ { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0}, ++#else ++#define INCLUDE_DEFAULTS_MUSL_PREFIX ++#endif + -+ res = concat (res, ext_string, NULL); ++#ifdef CROSS_INCLUDE_DIR ++#define INCLUDE_DEFAULTS_MUSL_CROSS \ ++ { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0}, ++#else ++#define INCLUDE_DEFAULTS_MUSL_CROSS ++#endif + -+ return res; ++#ifdef TOOL_INCLUDE_DIR ++#define INCLUDE_DEFAULTS_MUSL_TOOL \ ++ { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0}, ++#else ++#define INCLUDE_DEFAULTS_MUSL_TOOL ++#endif + -+not_found: -+ { -+ /* If detection fails we ignore the option. -+ Clean up and return empty string. */ ++#ifdef NATIVE_SYSTEM_HEADER_DIR ++#define INCLUDE_DEFAULTS_MUSL_NATIVE \ ++ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 }, \ ++ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 }, ++#else ++#define INCLUDE_DEFAULTS_MUSL_NATIVE ++#endif + -+ if (f) -+ fclose (f); ++#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT) ++# undef INCLUDE_DEFAULTS_MUSL_LOCAL ++# define INCLUDE_DEFAULTS_MUSL_LOCAL ++# undef INCLUDE_DEFAULTS_MUSL_NATIVE ++# define INCLUDE_DEFAULTS_MUSL_NATIVE ++#else ++# undef INCLUDE_DEFAULTS_MUSL_CROSS ++# define INCLUDE_DEFAULTS_MUSL_CROSS ++#endif + -+ return ""; ++#undef INCLUDE_DEFAULTS ++#define INCLUDE_DEFAULTS \ ++ { \ ++ INCLUDE_DEFAULTS_MUSL_GPP \ ++ INCLUDE_DEFAULTS_MUSL_PREFIX \ ++ INCLUDE_DEFAULTS_MUSL_CROSS \ ++ INCLUDE_DEFAULTS_MUSL_TOOL \ ++ INCLUDE_DEFAULTS_MUSL_NATIVE \ ++ { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 }, \ ++ { 0, 0, 0, 0, 0, 0 } \ + } -+} ++#endif + ---- a/src//dev/null -+++ b/src/gcc/config/aarch64/x-aarch64 -@@ -0,0 +1,3 @@ -+driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \ -+ $(CONFIG_H) $(SYSTEM_H) -+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ---- a/src/gcc/config/alpha/linux.h -+++ b/src/gcc/config/alpha/linux.h -@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3. If not see + #if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */ + /* This is a *uclinux* target. We don't define below macros to normal linux + versions, because doing so would require *uclinux* targets to include +--- a/src/gcc/config/linux.opt ++++ b/src/gcc/config/linux.opt +@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc) + Use GNU C library + + muclibc +-Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic) ++Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl) + Use uClibc C library ++ ++mmusl ++Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic) ++Use musl C library +--- a/src/gcc/config/mips/linux.h ++++ b/src/gcc/config/mips/linux.h +@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3. If not see + #define UCLIBC_DYNAMIC_LINKERN32 \ + "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}" + ++#undef MUSL_DYNAMIC_LINKER32 ++#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1" ++#undef MUSL_DYNAMIC_LINKER64 ++#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1" ++#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1" ++ + #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32" + #define GNU_USER_DYNAMIC_LINKERN32 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \ +- BIONIC_DYNAMIC_LINKERN32) ++ BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32) +--- a/src/gcc/config/rs6000/linux.h ++++ b/src/gcc/config/rs6000/linux.h +@@ -30,10 +30,14 @@ #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) @@ -3226,5612 +12657,14406 @@ #endif /* Determine what functions are present at the runtime; ---- a/src/gcc/config/arm/aarch-common-protos.h -+++ b/src/gcc/config/arm/aarch-common-protos.h -@@ -102,6 +102,8 @@ struct mem_cost_table - const int storef; /* SFmode. */ - const int stored; /* DFmode. */ - const int store_unaligned; /* Extra for unaligned stores. */ -+ const int loadv; /* Vector load. */ -+ const int storev; /* Vector store. */ - }; +--- a/src/gcc/config/rs6000/linux64.h ++++ b/src/gcc/config/rs6000/linux64.h +@@ -299,10 +299,14 @@ extern int dot_symbols; + #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) + #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) + #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) + #else + #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) + #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) + #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) ++#undef OPTION_MUSL ++#define OPTION_MUSL (linux_libc == LIBC_MUSL) + #endif - struct fp_cost_table ---- a/src/gcc/config/arm/aarch-cost-tables.h -+++ b/src/gcc/config/arm/aarch-cost-tables.h -@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs = - 1, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (2), /* storef. */ - COSTS_N_INSNS (3), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs = - 0, /* arith. */ - 0, /* logical. */ - COSTS_N_INSNS (1), /* shift. */ -- COSTS_N_INSNS (2), /* shift_reg. */ -+ 0, /* shift_reg. */ - COSTS_N_INSNS (1), /* arith_shift. */ -- COSTS_N_INSNS (2), /* arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* arith_shift_reg. */ - COSTS_N_INSNS (1), /* log_shift. */ -- COSTS_N_INSNS (2), /* log_shift_reg. */ -- 0, /* extend. */ -+ COSTS_N_INSNS (1), /* log_shift_reg. */ -+ COSTS_N_INSNS (1), /* extend. */ - COSTS_N_INSNS (1), /* extend_arith. */ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ -@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - 0, /* storef. */ - 0, /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - 0, /* storef. */ - 0, /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs = - 0, /* storef. */ - 0, /* stored. */ - 0, /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ ---- a/src/gcc/config/arm/arm-cores.def -+++ b/src/gcc/config/arm/arm-cores.def -@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex - ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7) - ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) - ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) --ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) -+ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4) + /* Determine what functions are present at the runtime; +--- a/src/gcc/configure ++++ b/src/gcc/configure +@@ -767,10 +767,6 @@ REPORT_BUGS_TEXI + REPORT_BUGS_TO + PKGVERSION + CONFIGURE_SPECS +-CROSS_SYSTEM_HEADER_DIR +-TARGET_SYSTEM_ROOT_DEFINE +-TARGET_SYSTEM_ROOT +-SYSROOT_CFLAGS_FOR_TARGET + enable_shared + enable_fixed_point + enable_decimal_float +@@ -809,6 +805,10 @@ LDFLAGS + CFLAGS + CC + GENINSRC ++CROSS_SYSTEM_HEADER_DIR ++TARGET_SYSTEM_ROOT_DEFINE ++TARGET_SYSTEM_ROOT ++SYSROOT_CFLAGS_FOR_TARGET + target_subdir + host_subdir + build_subdir +@@ -870,6 +870,9 @@ ac_user_opts=' + enable_option_checking + with_build_libsubdir + with_local_prefix ++with_native_system_header_dir ++with_build_sysroot ++with_sysroot + with_gxx_include_dir + with_cpp_install_dir + enable_generated_files_in_srcdir +@@ -896,9 +899,6 @@ enable_tls + enable_objc_gc + with_dwarf2 + enable_shared +-with_native_system_header_dir +-with_build_sysroot +-with_sysroot + with_specs + with_pkgversion + with_bugurl +@@ -1680,6 +1680,12 @@ Optional Packages: + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-build-libsubdir=DIR Directory where to find libraries for build system + --with-local-prefix=DIR specifies directory to put local include ++ --with-native-system-header-dir=dir ++ use dir as the directory to look for standard ++ system header files in. Defaults to /usr/include. ++ --with-build-sysroot=sysroot ++ use sysroot as the system root during the build ++ --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR + --with-gxx-include-dir=DIR + specifies directory to put g++ header files + --with-cpp-install-dir=DIR +@@ -1692,14 +1698,9 @@ Optional Packages: + --with-as arrange to use the specified as (full pathname) + --with-stabs arrange to use stabs instead of host debug format + --with-dwarf2 force the default debug format to be DWARF 2 +- --with-native-system-header-dir=dir +- use dir as the directory to look for standard +- system header files in. Defaults to /usr/include. +- --with-build-sysroot=sysroot +- use sysroot as the system root during the build +- --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR + --with-specs=SPECS add SPECS to driver command-line processing +- --with-pkgversion=PKG Use PKG in the version string in place of "GCC" ++ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro ++ GCC `cat $srcdir/LINARO-VERSION`" + --with-bugurl=URL Direct users to URL to report a bug + --with-multilib-list select multilibs (AArch64, SH and x86-64 only) + --with-gnu-ld assume the C compiler uses GNU ld default=no +@@ -3339,6 +3340,83 @@ if test x$local_prefix = x; then + local_prefix=/usr/local + fi - /* V7 big.LITTLE implementations */ - ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) ---- a/src/gcc/config/arm/arm-protos.h -+++ b/src/gcc/config/arm/arm-protos.h -@@ -66,10 +66,6 @@ extern rtx legitimize_tls_address (rtx, rtx); - extern bool arm_legitimate_address_p (machine_mode, rtx, bool); - extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int); - extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT); --extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int, -- int); --extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int, -- int); - extern int thumb1_legitimate_address_p (machine_mode, rtx, int); - extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode, - bool, bool); -@@ -257,13 +253,6 @@ struct cpu_vec_costs { ++ ++# Check whether --with-native-system-header-dir was given. ++if test "${with_native_system_header_dir+set}" = set; then : ++ withval=$with_native_system_header_dir; ++ case ${with_native_system_header_dir} in ++ yes|no) as_fn_error "bad value ${withval} given for --with-native-system-header-dir" "$LINENO" 5 ;; ++ /* | [A-Za-z]:[\\/]*) ;; ++ *) as_fn_error "--with-native-system-header-dir argument ${withval} must be an absolute directory" "$LINENO" 5 ;; ++ esac ++ configured_native_system_header_dir="${withval}" ++ ++else ++ configured_native_system_header_dir= ++fi ++ ++ ++ ++# Check whether --with-build-sysroot was given. ++if test "${with_build_sysroot+set}" = set; then : ++ withval=$with_build_sysroot; if test x"$withval" != x ; then ++ SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval" ++ fi ++else ++ SYSROOT_CFLAGS_FOR_TARGET= ++fi ++ ++ ++ ++if test "x$prefix" = xNONE; then ++ test_prefix=/usr/local ++else ++ test_prefix=$prefix ++fi ++if test "x$exec_prefix" = xNONE; then ++ test_exec_prefix=$test_prefix ++else ++ test_exec_prefix=$exec_prefix ++fi ++ ++ ++# Check whether --with-sysroot was given. ++if test "${with_sysroot+set}" = set; then : ++ withval=$with_sysroot; ++ case ${with_sysroot} in ++ /) ;; ++ */) with_sysroot=`echo $with_sysroot | sed 's,/$,,'` ;; ++ esac ++ case ${with_sysroot} in ++ yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;; ++ *) TARGET_SYSTEM_ROOT=$with_sysroot ;; ++ esac ++ ++ TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' ++ CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)' ++ ++ case ${TARGET_SYSTEM_ROOT} in ++ "${test_prefix}"|"${test_prefix}/"*|\ ++ "${test_exec_prefix}"|"${test_exec_prefix}/"*|\ ++ '${prefix}'|'${prefix}/'*|\ ++ '${exec_prefix}'|'${exec_prefix}/'*) ++ t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE" ++ TARGET_SYSTEM_ROOT_DEFINE="$t" ++ ;; ++ esac ++ ++else ++ ++ TARGET_SYSTEM_ROOT= ++ TARGET_SYSTEM_ROOT_DEFINE= ++ CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include' ++ ++fi ++ ++ ++ ++ ++ + # Don't set gcc_gxx_include_dir to gxx_include_dir since that's only + # passed in by the toplevel make and thus we'd get different behavior + # depending on where we built the sources. +@@ -3372,7 +3450,9 @@ gcc_gxx_include_dir_add_sysroot=0 + if test "${with_sysroot+set}" = set; then + gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'` + if test "${gcc_gxx_without_sysroot}"; then +- gcc_gxx_include_dir="${gcc_gxx_without_sysroot}" ++ if test x${with_sysroot} != x/; then ++ gcc_gxx_include_dir="${gcc_gxx_without_sysroot}" ++ fi + gcc_gxx_include_dir_add_sysroot=1 + fi + fi +@@ -7269,79 +7349,6 @@ fi - struct cpu_cost_table; --enum arm_sched_autopref -- { -- ARM_SCHED_AUTOPREF_OFF, -- ARM_SCHED_AUTOPREF_RANK, -- ARM_SCHED_AUTOPREF_FULL -- }; + +-# Check whether --with-native-system-header-dir was given. +-if test "${with_native_system_header_dir+set}" = set; then : +- withval=$with_native_system_header_dir; +- case ${with_native_system_header_dir} in +- yes|no) as_fn_error "bad value ${withval} given for --with-native-system-header-dir" "$LINENO" 5 ;; +- /* | [A-Za-z]:[\\/]*) ;; +- *) as_fn_error "--with-native-system-header-dir argument ${withval} must be an absolute directory" "$LINENO" 5 ;; +- esac +- configured_native_system_header_dir="${withval}" - - /* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this - structure is modified. */ +-else +- configured_native_system_header_dir= +-fi +- +- +- +-# Check whether --with-build-sysroot was given. +-if test "${with_build_sysroot+set}" = set; then : +- withval=$with_build_sysroot; if test x"$withval" != x ; then +- SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval" +- fi +-else +- SYSROOT_CFLAGS_FOR_TARGET= +-fi +- +- +- +-if test "x$prefix" = xNONE; then +- test_prefix=/usr/local +-else +- test_prefix=$prefix +-fi +-if test "x$exec_prefix" = xNONE; then +- test_exec_prefix=$test_prefix +-else +- test_exec_prefix=$exec_prefix +-fi +- +- +-# Check whether --with-sysroot was given. +-if test "${with_sysroot+set}" = set; then : +- withval=$with_sysroot; +- case ${with_sysroot} in +- yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;; +- *) TARGET_SYSTEM_ROOT=$with_sysroot ;; +- esac +- +- TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' +- CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)' +- +- case ${TARGET_SYSTEM_ROOT} in +- "${test_prefix}"|"${test_prefix}/"*|\ +- "${test_exec_prefix}"|"${test_exec_prefix}/"*|\ +- '${prefix}'|'${prefix}/'*|\ +- '${exec_prefix}'|'${exec_prefix}/'*) +- t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE" +- TARGET_SYSTEM_ROOT_DEFINE="$t" +- ;; +- esac +- +-else +- +- TARGET_SYSTEM_ROOT= +- TARGET_SYSTEM_ROOT_DEFINE= +- CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include' +- +-fi +- +- +- +- +- +- + # Check whether --with-specs was given. + if test "${with_specs+set}" = set; then : + withval=$with_specs; CONFIGURE_SPECS=$withval +@@ -7362,7 +7369,7 @@ if test "${with_pkgversion+set}" = set; then : + *) PKGVERSION="($withval) " ;; + esac + else +- PKGVERSION="(GCC) " ++ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " -@@ -272,39 +261,57 @@ struct tune_params - bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); - const struct cpu_cost_table *insn_extra_cost; - bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *); -+ int (*branch_cost) (bool, bool); -+ /* Vectorizer costs. */ -+ const struct cpu_vec_costs* vec_costs; - int constant_limit; - /* Maximum number of instructions to conditionalise. */ - int max_insns_skipped; -- int num_prefetch_slots; -- int l1_cache_size; -- int l1_cache_line_size; -- bool prefer_constant_pool; -- int (*branch_cost) (bool, bool); -+ /* Maximum number of instructions to inline calls to memset. */ -+ int max_insns_inline_memset; -+ /* Issue rate of the processor. */ -+ unsigned int issue_rate; -+ /* Explicit prefetch data. */ -+ struct -+ { -+ int num_slots; -+ int l1_cache_size; -+ int l1_cache_line_size; -+ } prefetch; -+ enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE} -+ prefer_constant_pool: 1; - /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */ -- bool prefer_ldrd_strd; -+ enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1; - /* The preference for non short cirtcuit operation when optimizing for - performance. The first element covers Thumb state and the second one - is for ARM state. */ -- bool logical_op_non_short_circuit[2]; -- /* Vectorizer costs. */ -- const struct cpu_vec_costs* vec_costs; -- /* Prefer Neon for 64-bit bitops. */ -- bool prefer_neon_for_64bits; -+ enum log_op_non_sc {LOG_OP_NON_SC_FALSE, LOG_OP_NON_SC_TRUE}; -+ log_op_non_sc logical_op_non_short_circuit_thumb: 1; -+ log_op_non_sc logical_op_non_short_circuit_arm: 1; - /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ -- bool disparage_flag_setting_t16_encodings; -- /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags -- would be set. */ -- bool disparage_partial_flag_setting_t16_encodings; -+ enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL} -+ disparage_flag_setting_t16_encodings: 2; -+ enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1; - /* Prefer to inline string operations like memset by using Neon. */ -- bool string_ops_prefer_neon; -- /* Maximum number of instructions to inline calls to memset. */ -- int max_insns_inline_memset; -- /* Bitfield encoding the fuseable pairs of instructions. */ -- unsigned int fuseable_ops; -+ enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE} -+ string_ops_prefer_neon: 1; -+ /* Bitfield encoding the fuseable pairs of instructions. Use FUSE_OPS -+ in an initializer if multiple fusion operations are supported on a -+ target. */ -+ enum fuse_ops -+ { -+ FUSE_NOTHING = 0, -+ FUSE_MOVW_MOVT = 1 << 0 -+ } fuseable_ops: 1; - /* Depth of scheduling queue to check for L2 autoprefetcher. */ -- enum arm_sched_autopref sched_autopref; -+ enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL} -+ sched_autopref: 2; - }; + fi -+/* Smash multiple fusion operations into a type that can be used for an -+ initializer. */ -+#define FUSE_OPS(x) ((tune_params::fuse_ops) (x)) -+ - extern const struct tune_params *current_tune; - extern int vfp3_const_double_for_fract_bits (rtx); - /* return power of two from operand, otherwise 0. */ ---- a/src/gcc/config/arm/arm.c -+++ b/src/gcc/config/arm/arm.c -@@ -940,11 +940,13 @@ struct processors - }; +@@ -18162,7 +18169,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 18165 "configure" ++#line 18172 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -18268,7 +18275,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 18271 "configure" ++#line 18278 "configure" + #include "confdefs.h" + #if HAVE_DLFCN_H +@@ -27802,6 +27809,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then : + else + gcc_cv_libc_provides_ssp=no + case "$target" in ++ *-*-musl*) ++ # All versions of musl provide stack protector ++ gcc_cv_libc_provides_ssp=yes;; + *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu) + # glibc 2.4 and later provides __stack_chk_fail and + # either __stack_chk_guard, or TLS access to stack guard canary. +@@ -27834,6 +27844,7 @@ fi + # ) and for now + # simply assert that glibc does provide this, which is true for all + # realistically usable GNU/Hurd configurations. ++ # All supported versions of musl provide it as well + gcc_cv_libc_provides_ssp=yes;; + *-*-darwin* | *-*-freebsd*) + ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail" +@@ -27930,6 +27941,9 @@ case "$target" in + gcc_cv_target_dl_iterate_phdr=no + fi + ;; ++ *-linux-musl*) ++ gcc_cv_target_dl_iterate_phdr=yes ++ ;; + esac --#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 --#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ -- prefetch_slots, \ -- l1_size, \ -- l1_line_size -+#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 } -+#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \ -+ { \ -+ num_slots, \ -+ l1_size, \ -+ l1_line_size \ -+ } + if test x$gcc_cv_target_dl_iterate_phdr = xyes; then +--- a/src/gcc/configure.ac ++++ b/src/gcc/configure.ac +@@ -121,6 +121,73 @@ if test x$local_prefix = x; then + local_prefix=/usr/local + fi - /* arm generic vectorizer costs. */ - static const -@@ -1027,7 +1029,9 @@ const struct cpu_cost_table cortexa9_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (1), /* storef. */ - COSTS_N_INSNS (1), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1128,7 +1132,9 @@ const struct cpu_cost_table cortexa8_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (1), /* storef. */ - COSTS_N_INSNS (1), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1230,7 +1236,9 @@ const struct cpu_cost_table cortexa5_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (2), /* storef. */ - COSTS_N_INSNS (2), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1333,7 +1341,9 @@ const struct cpu_cost_table cortexa7_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (2), /* storef. */ - COSTS_N_INSNS (2), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1434,7 +1444,9 @@ const struct cpu_cost_table cortexa12_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (2), /* storef. */ - COSTS_N_INSNS (2), /* stored. */ -- 0 /* store_unaligned. */ -+ 0, /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1535,7 +1547,9 @@ const struct cpu_cost_table cortexa15_extra_costs = - 2, /* stm_regs_per_insn_subsequent. */ - 0, /* storef. */ - 0, /* stored. */ -- 0 /* store_unaligned. */ -+ 0, /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1636,7 +1650,9 @@ const struct cpu_cost_table v7m_extra_costs = - 1, /* stm_regs_per_insn_subsequent. */ - COSTS_N_INSNS (2), /* storef. */ - COSTS_N_INSNS (3), /* stored. */ -- COSTS_N_INSNS (1) /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ - }, - { - /* FP SFmode */ -@@ -1678,49 +1694,50 @@ const struct cpu_cost_table v7m_extra_costs = - } - }; ++AC_ARG_WITH([native-system-header-dir], ++ [ --with-native-system-header-dir=dir ++ use dir as the directory to look for standard ++ system header files in. Defaults to /usr/include.], ++[ ++ case ${with_native_system_header_dir} in ++ yes|no) AC_MSG_ERROR([bad value ${withval} given for --with-native-system-header-dir]) ;; ++ /* | [[A-Za-z]]:[[\\/]]*) ;; ++ *) AC_MSG_ERROR([--with-native-system-header-dir argument ${withval} must be an absolute directory]) ;; ++ esac ++ configured_native_system_header_dir="${withval}" ++], [configured_native_system_header_dir=]) ++ ++AC_ARG_WITH(build-sysroot, ++ [AS_HELP_STRING([--with-build-sysroot=sysroot], ++ [use sysroot as the system root during the build])], ++ [if test x"$withval" != x ; then ++ SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval" ++ fi], ++ [SYSROOT_CFLAGS_FOR_TARGET=]) ++AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET) ++ ++if test "x$prefix" = xNONE; then ++ test_prefix=/usr/local ++else ++ test_prefix=$prefix ++fi ++if test "x$exec_prefix" = xNONE; then ++ test_exec_prefix=$test_prefix ++else ++ test_exec_prefix=$exec_prefix ++fi ++ ++AC_ARG_WITH(sysroot, ++[AS_HELP_STRING([[--with-sysroot[=DIR]]], ++ [search for usr/lib, usr/include, et al, within DIR])], ++[ ++ case ${with_sysroot} in ++ /) ;; ++ */) with_sysroot=`echo $with_sysroot | sed 's,/$,,'` ;; ++ esac ++ case ${with_sysroot} in ++ yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;; ++ *) TARGET_SYSTEM_ROOT=$with_sysroot ;; ++ esac ++ ++ TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' ++ CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)' ++ ++ case ${TARGET_SYSTEM_ROOT} in ++ "${test_prefix}"|"${test_prefix}/"*|\ ++ "${test_exec_prefix}"|"${test_exec_prefix}/"*|\ ++ '${prefix}'|'${prefix}/'*|\ ++ '${exec_prefix}'|'${exec_prefix}/'*) ++ t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE" ++ TARGET_SYSTEM_ROOT_DEFINE="$t" ++ ;; ++ esac ++], [ ++ TARGET_SYSTEM_ROOT= ++ TARGET_SYSTEM_ROOT_DEFINE= ++ CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include' ++]) ++AC_SUBST(TARGET_SYSTEM_ROOT) ++AC_SUBST(TARGET_SYSTEM_ROOT_DEFINE) ++AC_SUBST(CROSS_SYSTEM_HEADER_DIR) ++ + # Don't set gcc_gxx_include_dir to gxx_include_dir since that's only + # passed in by the toplevel make and thus we'd get different behavior + # depending on where we built the sources. +@@ -152,7 +219,9 @@ gcc_gxx_include_dir_add_sysroot=0 + if test "${with_sysroot+set}" = set; then + gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'` + if test "${gcc_gxx_without_sysroot}"; then +- gcc_gxx_include_dir="${gcc_gxx_without_sysroot}" ++ if test x${with_sysroot} != x/; then ++ gcc_gxx_include_dir="${gcc_gxx_without_sysroot}" ++ fi + gcc_gxx_include_dir_add_sysroot=1 + fi + fi +@@ -791,69 +860,6 @@ AC_ARG_ENABLE(shared, + ], [enable_shared=yes]) + AC_SUBST(enable_shared) + +-AC_ARG_WITH([native-system-header-dir], +- [ --with-native-system-header-dir=dir +- use dir as the directory to look for standard +- system header files in. Defaults to /usr/include.], +-[ +- case ${with_native_system_header_dir} in +- yes|no) AC_MSG_ERROR([bad value ${withval} given for --with-native-system-header-dir]) ;; +- /* | [[A-Za-z]]:[[\\/]]*) ;; +- *) AC_MSG_ERROR([--with-native-system-header-dir argument ${withval} must be an absolute directory]) ;; +- esac +- configured_native_system_header_dir="${withval}" +-], [configured_native_system_header_dir=]) +- +-AC_ARG_WITH(build-sysroot, +- [AS_HELP_STRING([--with-build-sysroot=sysroot], +- [use sysroot as the system root during the build])], +- [if test x"$withval" != x ; then +- SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval" +- fi], +- [SYSROOT_CFLAGS_FOR_TARGET=]) +-AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET) +- +-if test "x$prefix" = xNONE; then +- test_prefix=/usr/local +-else +- test_prefix=$prefix +-fi +-if test "x$exec_prefix" = xNONE; then +- test_exec_prefix=$test_prefix +-else +- test_exec_prefix=$exec_prefix +-fi +- +-AC_ARG_WITH(sysroot, +-[AS_HELP_STRING([[--with-sysroot[=DIR]]], +- [search for usr/lib, usr/include, et al, within DIR])], +-[ +- case ${with_sysroot} in +- yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;; +- *) TARGET_SYSTEM_ROOT=$with_sysroot ;; +- esac +- +- TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' +- CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)' +- +- case ${TARGET_SYSTEM_ROOT} in +- "${test_prefix}"|"${test_prefix}/"*|\ +- "${test_exec_prefix}"|"${test_exec_prefix}/"*|\ +- '${prefix}'|'${prefix}/'*|\ +- '${exec_prefix}'|'${exec_prefix}/'*) +- t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE" +- TARGET_SYSTEM_ROOT_DEFINE="$t" +- ;; +- esac +-], [ +- TARGET_SYSTEM_ROOT= +- TARGET_SYSTEM_ROOT_DEFINE= +- CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include' +-]) +-AC_SUBST(TARGET_SYSTEM_ROOT) +-AC_SUBST(TARGET_SYSTEM_ROOT_DEFINE) +-AC_SUBST(CROSS_SYSTEM_HEADER_DIR) +- + AC_ARG_WITH(specs, + [AS_HELP_STRING([--with-specs=SPECS], + [add SPECS to driver command-line processing])], +@@ -862,7 +868,7 @@ AC_ARG_WITH(specs, + ) + AC_SUBST(CONFIGURE_SPECS) --#define ARM_FUSE_NOTHING (0) --#define ARM_FUSE_MOVW_MOVT (1 << 0) -- - const struct tune_params arm_slowmul_tune = - { - arm_slowmul_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 3, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; +-ACX_PKGVERSION([GCC]) ++ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) + ACX_BUGURL([http://gcc.gnu.org/bugs.html]) - const struct tune_params arm_fastmul_tune = - { - arm_fastmul_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + # Sanity check enable_languages in case someone does not run the toplevel +@@ -5282,6 +5288,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library, + gcc_cv_libc_provides_ssp, + [gcc_cv_libc_provides_ssp=no + case "$target" in ++ *-*-musl*) ++ # All versions of musl provide stack protector ++ gcc_cv_libc_provides_ssp=yes;; + *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu) + # glibc 2.4 and later provides __stack_chk_fail and + # either __stack_chk_guard, or TLS access to stack guard canary. +@@ -5308,6 +5317,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library, + # ) and for now + # simply assert that glibc does provide this, which is true for all + # realistically usable GNU/Hurd configurations. ++ # All supported versions of musl provide it as well + gcc_cv_libc_provides_ssp=yes;; + *-*-darwin* | *-*-freebsd*) + AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes], +@@ -5381,6 +5391,9 @@ case "$target" in + gcc_cv_target_dl_iterate_phdr=no + fi + ;; ++ *-linux-musl*) ++ gcc_cv_target_dl_iterate_phdr=yes ++ ;; + esac + GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR]) + if test x$gcc_cv_target_dl_iterate_phdr = xyes; then +--- a/src/gcc/cp/Make-lang.in ++++ b/src/gcc/cp/Make-lang.in +@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets + # List of targets that can use the generic check- rule and its // variant. + lang_checks += check-g++ + lang_checks_parallelized += check-g++ +-# For description see comment above check_gcc_parallelize in gcc/Makefile.in. ++# For description see the check_$lang_parallelize comment in gcc/Makefile.in. + check_g++_parallelize = 10000 + # + # Install hooks: +@@ -221,6 +221,7 @@ c++.mostlyclean: + -rm -f doc/g++.1 + -rm -f cp/*$(objext) + -rm -f cp/*$(coverageexts) ++ -rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext) + c++.clean: + c++.distclean: + -rm -f cp/config.status cp/Makefile +--- a/src/gcc/cppbuiltin.c ++++ b/src/gcc/cppbuiltin.c +@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel) + *patchlevel = s_patchlevel; + } + ++/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]" ++ to create Linaro release number YYYYMM and spin version. */ ++static void ++parse_linarover (int *release, int *spin) ++{ ++ static int s_year = -1, s_month, s_spin; ++ ++ if (s_year == -1) ++ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3) ++ { ++ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month); ++ s_spin = 0; ++ } ++ ++ if (release) ++ *release = s_year * 100 + s_month; ++ ++ if (spin) ++ *spin = s_spin; ++} - /* StrongARM has early execution of branches, so a sequence that is worth -@@ -1729,233 +1746,279 @@ const struct tune_params arm_fastmul_tune = - const struct tune_params arm_strongarm_tune = + /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */ + static void + define__GNUC__ (cpp_reader *pfile) { - arm_fastmul_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 3, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; +- int major, minor, patchlevel; ++ int major, minor, patchlevel, linaro_release, linaro_spin; - const struct tune_params arm_xscale_tune = - { - arm_xscale_rtx_costs, -- NULL, -+ NULL, /* Insn extra costs. */ - xscale_sched_adjust_cost, -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 2, /* Constant limit. */ - 3, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + parse_basever (&major, &minor, &patchlevel); ++ parse_linarover (&linaro_release, &linaro_spin); + cpp_define_formatted (pfile, "__GNUC__=%d", major); + cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor); + cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel); + cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string); ++ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release); ++ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin); + cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED); + cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST); + cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE); +--- a/src/gcc/cprop.c ++++ b/src/gcc/cprop.c +@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x) + return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x)); + } - const struct tune_params arm_9e_tune = - { - arm_9e_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF -+}; ++/* Determine whether the rtx X should be treated as a register that can ++ be propagated. Any pseudo-register is fine. */ + -+const struct tune_params arm_marvell_pj4_tune = ++static bool ++cprop_reg_p (const_rtx x) +{ -+ arm_9e_rtx_costs, -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ - arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ &arm_default_vec_cost, -+ 1, /* Constant limit. */ -+ 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; ++ return REG_P (x) && !HARD_REGISTER_P (x); ++} ++ + /* Scan SET present in INSN and add an entry to the hash TABLE. + IMPLICIT is true if it's an implicit set, false otherwise. */ + +@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table, + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + +- if (REG_P (dest) +- && ! HARD_REGISTER_P (dest) ++ if (cprop_reg_p (dest) + && reg_available_p (dest, insn) + && can_copy_p (GET_MODE (dest))) + { +@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table, + src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src); + + /* Record sets for constant/copy propagation. */ +- if ((REG_P (src) ++ if ((cprop_reg_p (src) + && src != dest +- && ! HARD_REGISTER_P (src) + && reg_available_p (src, insn)) + || cprop_constant_p (src)) + insert_set_in_table (dest, src, insn, table, implicit); +@@ -758,12 +765,37 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn) + int success = 0; + rtx set = single_set (insn); + ++ bool check_rtx_costs = true; ++ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); ++ int old_cost = set ? set_rtx_cost (set, speed) : 0; ++ ++ if ((note != 0 ++ && REG_NOTE_KIND (note) == REG_EQUAL ++ && (GET_CODE (XEXP (note, 0)) == CONST ++ || CONSTANT_P (XEXP (note, 0)))) ++ || (set && CONSTANT_P (SET_SRC (set)))) ++ check_rtx_costs = false; ++ + /* Usually we substitute easy stuff, so we won't copy everything. + We however need to take care to not duplicate non-trivial CONST + expressions. */ + to = copy_rtx (to); + + validate_replace_src_group (from, to, insn); ++ ++ /* If TO is a constant, check the cost of the set after propagation ++ to the cost of the set before the propagation. If the cost is ++ higher, then do not replace FROM with TO. */ ++ ++ if (check_rtx_costs ++ && CONSTANT_P (to) ++ && (set_rtx_cost (set, speed) > old_cost)) ++ { ++ cancel_changes (0); ++ return false; ++ } ++ ++ + if (num_changes_pending () && apply_change_group ()) + success = 1; + +@@ -821,15 +853,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn) + return success; + } + +-/* Find a set of REGNOs that are available on entry to INSN's block. Return +- NULL no such set is found. */ ++/* Find a set of REGNOs that are available on entry to INSN's block. If found, ++ SET_RET[0] will be assigned a set with a register source and SET_RET[1] a ++ set with a constant source. If not found the corresponding entry is set to ++ NULL. */ + +-static struct cprop_expr * +-find_avail_set (int regno, rtx_insn *insn) ++static void ++find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2]) + { +- /* SET1 contains the last set found that can be returned to the caller for +- use in a substitution. */ +- struct cprop_expr *set1 = 0; ++ set_ret[0] = set_ret[1] = NULL; + + /* Loops are not possible here. To get a loop we would need two sets + available at the start of the block containing INSN. i.e. we would +@@ -869,8 +901,10 @@ find_avail_set (int regno, rtx_insn *insn) + If the source operand changed, we may still use it for the next + iteration of this loop, but we may not use it for substitutions. */ + +- if (cprop_constant_p (src) || reg_not_set_p (src, insn)) +- set1 = set; ++ if (cprop_constant_p (src)) ++ set_ret[1] = set; ++ else if (reg_not_set_p (src, insn)) ++ set_ret[0] = set; + + /* If the source of the set is anything except a register, then + we have reached the end of the copy chain. */ +@@ -881,10 +915,6 @@ find_avail_set (int regno, rtx_insn *insn) + and see if we have an available copy into SRC. */ + regno = REGNO (src); + } +- +- /* SET1 holds the last set that was available and anticipatable at +- INSN. */ +- return set1; + } + + /* Subroutine of cprop_insn that tries to propagate constants into +@@ -1050,40 +1080,40 @@ cprop_insn (rtx_insn *insn) + int changed = 0, changed_this_round; + rtx note; + +-retry: +- changed_this_round = 0; +- reg_use_count = 0; +- note_uses (&PATTERN (insn), find_used_regs, NULL); +- +- /* We may win even when propagating constants into notes. */ +- note = find_reg_equal_equiv_note (insn); +- if (note) +- find_used_regs (&XEXP (note, 0), NULL); +- +- for (i = 0; i < reg_use_count; i++) ++ do + { +- rtx reg_used = reg_use_table[i]; +- unsigned int regno = REGNO (reg_used); +- rtx src; +- struct cprop_expr *set; ++ changed_this_round = 0; ++ reg_use_count = 0; ++ note_uses (&PATTERN (insn), find_used_regs, NULL); + +- /* If the register has already been set in this block, there's +- nothing we can do. */ +- if (! reg_not_set_p (reg_used, insn)) +- continue; ++ /* We may win even when propagating constants into notes. */ ++ note = find_reg_equal_equiv_note (insn); ++ if (note) ++ find_used_regs (&XEXP (note, 0), NULL); - const struct tune_params arm_v6t2_tune = - { - arm_9e_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; +- /* Find an assignment that sets reg_used and is available +- at the start of the block. */ +- set = find_avail_set (regno, insn); +- if (! set) +- continue; ++ for (i = 0; i < reg_use_count; i++) ++ { ++ rtx reg_used = reg_use_table[i]; ++ unsigned int regno = REGNO (reg_used); ++ rtx src_cst = NULL, src_reg = NULL; ++ struct cprop_expr *set[2]; + +- src = set->src; ++ /* If the register has already been set in this block, there's ++ nothing we can do. */ ++ if (! reg_not_set_p (reg_used, insn)) ++ continue; +- /* Constant propagation. */ +- if (cprop_constant_p (src)) +- { +- if (constprop_register (reg_used, src, insn)) ++ /* Find an assignment that sets reg_used and is available ++ at the start of the block. */ ++ find_avail_set (regno, insn, set); ++ if (set[0]) ++ src_reg = set[0]->src; ++ if (set[1]) ++ src_cst = set[1]->src; + - /* Generic Cortex tuning. Use more specific tunings if appropriate. */ - const struct tune_params arm_cortex_tune = - { - arm_9e_rtx_costs, - &generic_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; ++ /* Constant propagation. */ ++ if (src_cst && cprop_constant_p (src_cst) ++ && constprop_register (reg_used, src_cst, insn)) + { + changed_this_round = changed = 1; + global_const_prop_count++; +@@ -1093,18 +1123,16 @@ retry: + "GLOBAL CONST-PROP: Replacing reg %d in ", regno); + fprintf (dump_file, "insn %d with constant ", + INSN_UID (insn)); +- print_rtl (dump_file, src); ++ print_rtl (dump_file, src_cst); + fprintf (dump_file, "\n"); + } + if (insn->deleted ()) + return 1; + } +- } +- else if (REG_P (src) +- && REGNO (src) >= FIRST_PSEUDO_REGISTER +- && REGNO (src) != regno) +- { +- if (try_replace_reg (reg_used, src, insn)) ++ /* Copy propagation. */ ++ else if (src_reg && cprop_reg_p (src_reg) ++ && REGNO (src_reg) != regno ++ && try_replace_reg (reg_used, src_reg, insn)) + { + changed_this_round = changed = 1; + global_copy_prop_count++; +@@ -1113,7 +1141,7 @@ retry: + fprintf (dump_file, + "GLOBAL COPY-PROP: Replacing reg %d in insn %d", + regno, INSN_UID (insn)); +- fprintf (dump_file, " with reg %d\n", REGNO (src)); ++ fprintf (dump_file, " with reg %d\n", REGNO (src_reg)); + } - const struct tune_params arm_cortex_a8_tune = - { - arm_9e_rtx_costs, - &cortexa8_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + /* The original insn setting reg_used may or may not now be +@@ -1123,12 +1151,10 @@ retry: + and made things worse. */ + } + } +- +- /* If try_replace_reg simplified the insn, the regs found +- by find_used_regs may not be valid anymore. Start over. */ +- if (changed_this_round) +- goto retry; + } ++ /* If try_replace_reg simplified the insn, the regs found by find_used_regs ++ may not be valid anymore. Start over. */ ++ while (changed_this_round); - const struct tune_params arm_cortex_a7_tune = - { - arm_9e_rtx_costs, - &cortexa7_extra_costs, -- NULL, -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + if (changed && DEBUG_INSN_P (insn)) + return 0; +@@ -1191,7 +1217,7 @@ do_local_cprop (rtx x, rtx_insn *insn) + /* Rule out USE instructions and ASM statements as we don't want to + change the hard registers mentioned. */ + if (REG_P (x) +- && (REGNO (x) >= FIRST_PSEUDO_REGISTER ++ && (cprop_reg_p (x) + || (GET_CODE (PATTERN (insn)) != USE + && asm_noperands (PATTERN (insn)) < 0))) + { +@@ -1207,7 +1233,7 @@ do_local_cprop (rtx x, rtx_insn *insn) - const struct tune_params arm_cortex_a15_tune = - { - arm_9e_rtx_costs, - &cortexa15_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 3, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- true, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- true, true, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_FULL - }; + if (cprop_constant_p (this_rtx)) + newcnst = this_rtx; +- if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER ++ if (cprop_reg_p (this_rtx) + /* Don't copy propagate if it has attached REG_EQUIV note. + At this point this only function parameters should have + REG_EQUIV notes and if the argument slot is used somewhere +@@ -1328,9 +1354,8 @@ implicit_set_cond_p (const_rtx cond) + if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE) + return false; - const struct tune_params arm_cortex_a53_tune = - { - arm_9e_rtx_costs, - &cortexa53_extra_costs, -- NULL, /* Scheduler cost adjustment. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), -+ tune_params::SCHED_AUTOPREF_OFF - }; +- /* The first operand of COND must be a pseudo-reg. */ +- if (! REG_P (XEXP (cond, 0)) +- || HARD_REGISTER_P (XEXP (cond, 0))) ++ /* The first operand of COND must be a register we can propagate. */ ++ if (!cprop_reg_p (XEXP (cond, 0))) + return false; - const struct tune_params arm_cortex_a57_tune = - { - arm_9e_rtx_costs, - &cortexa57_extra_costs, -- NULL, /* Scheduler cost adjustment. */ -- 1, /* Constant limit. */ -- 2, /* Max cond insns. */ -- ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -+ NULL, /* Sched adj cost. */ - arm_default_branch_cost, -- true, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- true, true, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ -+ &arm_default_vec_cost, -+ 1, /* Constant limit. */ -+ 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 3, /* Issue rate. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), -+ tune_params::SCHED_AUTOPREF_FULL - }; + /* The second operand of COND must be a suitable constant. */ +--- a/src/gcc/cse.c ++++ b/src/gcc/cse.c +@@ -4540,14 +4540,49 @@ cse_insn (rtx_insn *insn) + canonicalize_insn (insn, &sets, n_sets); + + /* If this insn has a REG_EQUAL note, store the equivalent value in SRC_EQV, +- if different, or if the DEST is a STRICT_LOW_PART. The latter condition +- is necessary because SRC_EQV is handled specially for this case, and if +- it isn't set, then there will be no equivalence for the destination. */ ++ if different, or if the DEST is a STRICT_LOW_PART/ZERO_EXTRACT. The ++ latter condition is necessary because SRC_EQV is handled specially for ++ this case, and if it isn't set, then there will be no equivalence ++ for the destination. */ + if (n_sets == 1 && REG_NOTES (insn) != 0 + && (tem = find_reg_note (insn, REG_EQUAL, NULL_RTX)) != 0 +- && (! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl)) +- || GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART)) +- src_eqv = copy_rtx (XEXP (tem, 0)); ++ && (! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl)))) ++ { ++ if (GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART) ++ src_eqv = copy_rtx (XEXP (tem, 0)); ++ ++ /* If DEST is of the form ZERO_EXTACT, as in: ++ (set (zero_extract:SI (reg:SI 119) ++ (const_int 16 [0x10]) ++ (const_int 16 [0x10])) ++ (const_int 51154 [0xc7d2])) ++ REG_EQUAL note will specify the value of register (reg:SI 119) at this ++ point. Note that this is different from SRC_EQV. We can however ++ calculate SRC_EQV with the position and width of ZERO_EXTRACT. */ ++ else if (GET_CODE (SET_DEST (sets[0].rtl)) == ZERO_EXTRACT ++ && CONST_INT_P (XEXP (tem, 0)) ++ && CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 1)) ++ && CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 2))) ++ { ++ rtx dest_reg = XEXP (SET_DEST (sets[0].rtl), 0); ++ rtx width = XEXP (SET_DEST (sets[0].rtl), 1); ++ rtx pos = XEXP (SET_DEST (sets[0].rtl), 2); ++ HOST_WIDE_INT val = INTVAL (XEXP (tem, 0)); ++ HOST_WIDE_INT mask; ++ unsigned int shift; ++ if (BITS_BIG_ENDIAN) ++ shift = GET_MODE_PRECISION (GET_MODE (dest_reg)) ++ - INTVAL (pos) - INTVAL (width); ++ else ++ shift = INTVAL (pos); ++ if (INTVAL (width) == HOST_BITS_PER_WIDE_INT) ++ mask = ~(HOST_WIDE_INT) 0; ++ else ++ mask = ((HOST_WIDE_INT) 1 << INTVAL (width)) - 1; ++ val = (val >> shift) & mask; ++ src_eqv = GEN_INT (val); ++ } ++ } - const struct tune_params arm_xgene1_tune = + /* Set sets[i].src_elt to the class each source belongs to. + Detect assignments from or to volatile things +--- a/src/gcc/df-core.c ++++ b/src/gcc/df-core.c +@@ -642,7 +642,6 @@ void + df_finish_pass (bool verify ATTRIBUTE_UNUSED) { - arm_9e_rtx_costs, - &xgene1_extra_costs, -- NULL, /* Scheduler cost adjustment. */ -- 1, /* Constant limit. */ -- 2, /* Max cond insns. */ -- ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -+ NULL, /* Sched adj cost. */ - arm_default_branch_cost, -- true, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- true, true, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 32, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ &arm_default_vec_cost, -+ 1, /* Constant limit. */ -+ 2, /* Max cond insns. */ -+ 32, /* Memset max inline. */ -+ 4, /* Issue rate. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + int i; +- int removed = 0; - /* Branches can be dual-issued on Cortex-A5, so conditional execution is -@@ -1965,21 +2028,23 @@ const struct tune_params arm_cortex_a5_tune = - { - arm_9e_rtx_costs, - &cortexa5_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_cortex_a5_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 1, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_cortex_a5_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {false, false}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + #ifdef ENABLE_DF_CHECKING + int saved_flags; +@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED) + saved_flags = df->changeable_flags; + #endif - const struct tune_params arm_cortex_a9_tune = -@@ -1987,41 +2052,45 @@ const struct tune_params arm_cortex_a9_tune = - arm_9e_rtx_costs, - &cortexa9_extra_costs, - cortex_a9_sched_adjust_cost, -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_BENEFICIAL(4,32,32), -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; +- for (i = 0; i < df->num_problems_defined; i++) ++ /* We iterate over problems by index as each problem removed will ++ lead to problems_in_order to be reordered. */ ++ for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++) + { +- struct dataflow *dflow = df->problems_in_order[i]; +- struct df_problem *problem = dflow->problem; ++ struct dataflow *dflow = df->problems_by_index[i]; - const struct tune_params arm_cortex_a12_tune = - { - arm_9e_rtx_costs, - &cortexa12_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, /* Vectorizer costs. */ - 1, /* Constant limit. */ - 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- true, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- true, true, /* Prefer 32-bit encodings. */ -- true, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), -+ tune_params::SCHED_AUTOPREF_OFF - }; +- if (dflow->optional_p) +- { +- gcc_assert (problem->remove_problem_fun); +- (problem->remove_problem_fun) (); +- df->problems_in_order[i] = NULL; +- df->problems_by_index[problem->id] = NULL; +- removed++; +- } ++ if (dflow && dflow->optional_p) ++ df_remove_problem (dflow); + } +- df->num_problems_defined -= removed; + + /* Clear all of the flags. */ + df->changeable_flags = 0; +--- a/src/gcc/emit-rtl.c ++++ b/src/gcc/emit-rtl.c +@@ -5234,7 +5234,8 @@ set_for_reg_notes (rtx insn) + reg = SET_DEST (pat); + + /* Notes apply to the contents of a STRICT_LOW_PART. */ +- if (GET_CODE (reg) == STRICT_LOW_PART) ++ if (GET_CODE (reg) == STRICT_LOW_PART ++ || GET_CODE (reg) == ZERO_EXTRACT) + reg = XEXP (reg, 0); + + /* Check that we have a register. */ +--- a/src/gcc/expr.c ++++ b/src/gcc/expr.c +@@ -7725,15 +7725,7 @@ expand_expr_addr_expr_1 (tree exp, rtx target, machine_mode tmode, + marked TREE_ADDRESSABLE, which will be either a front-end + or a tree optimizer bug. */ + +- if (TREE_ADDRESSABLE (exp) +- && ! MEM_P (result) +- && ! targetm.calls.allocate_stack_slots_for_args ()) +- { +- error ("local frame unavailable (naked function?)"); +- return result; +- } +- else +- gcc_assert (MEM_P (result)); ++ gcc_assert (MEM_P (result)); + result = XEXP (result, 0); + + /* ??? Is this needed anymore? */ +--- a/src/gcc/fortran/Make-lang.in ++++ b/src/gcc/fortran/Make-lang.in +@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets + check-fortran-subtargets : check-gfortran-subtargets + lang_checks += check-gfortran + lang_checks_parallelized += check-gfortran +-# For description see comment above check_gcc_parallelize in gcc/Makefile.in. ++# For description see the check_$lang_parallelize comment in gcc/Makefile.in. + check_gfortran_parallelize = 10000 - /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single -@@ -2035,21 +2104,23 @@ const struct tune_params arm_v7m_tune = - { - arm_9e_rtx_costs, - &v7m_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_cortex_m_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_cortex_m_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {false, false}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + # GFORTRAN documentation. +@@ -275,7 +275,7 @@ fortran.uninstall: + # We just have to delete files specific to us. - /* Cortex-M7 tuning. */ -@@ -2058,21 +2129,23 @@ const struct tune_params arm_cortex_m7_tune = - { - arm_9e_rtx_costs, - &v7m_extra_costs, -- NULL, /* Sched adj cost. */ -+ NULL, /* Sched adj cost. */ -+ arm_cortex_m7_branch_cost, -+ &arm_default_vec_cost, - 0, /* Constant limit. */ - 1, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_cortex_m7_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + fortran.mostlyclean: +- -rm -f f951$(exeext) ++ -rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext) + -rm -f fortran/*.o - /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than -@@ -2080,43 +2153,47 @@ const struct tune_params arm_cortex_m7_tune = - const struct tune_params arm_v6m_tune = + fortran.clean: +--- a/src/gcc/function.c ++++ b/src/gcc/function.c +@@ -227,6 +227,7 @@ free_after_compilation (struct function *f) + f->eh = NULL; + f->machine = NULL; + f->cfg = NULL; ++ f->curr_properties &= ~PROP_cfg; + + regno_reg_rtx = NULL; + } +@@ -2121,9 +2122,6 @@ aggregate_value_p (const_tree exp, const_tree fntype) + bool + use_register_for_decl (const_tree decl) { - arm_9e_rtx_costs, -- NULL, -- NULL, /* Sched adj cost. */ -+ NULL, /* Insn extra costs. */ -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, /* Vectorizer costs. */ - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 1, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- false, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {false, false}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF +- if (!targetm.calls.allocate_stack_slots_for_args ()) +- return true; +- + /* Honor volatile. */ + if (TREE_SIDE_EFFECTS (decl)) + return false; +@@ -2151,6 +2149,9 @@ use_register_for_decl (const_tree decl) + if (flag_float_store && FLOAT_TYPE_P (TREE_TYPE (decl))) + return false; + ++ if (!targetm.calls.allocate_stack_slots_for_args ()) ++ return true; ++ + /* If we're not interested in tracking debugging information for + this decl, then we can certainly put it in a register. */ + if (DECL_IGNORED_P (decl)) +--- a/src/gcc/genmatch.c ++++ b/src/gcc/genmatch.c +@@ -2922,7 +2922,12 @@ parser::parse_operation () + + user_id *p = dyn_cast (op); + if (p && p->is_oper_list) +- record_operlist (id_tok->src_loc, p); ++ { ++ if (active_fors.length() == 0) ++ record_operlist (id_tok->src_loc, p); ++ else ++ fatal_at (id_tok, "operator-list %s cannot be exapnded inside 'for'", id); ++ } + return op; + } + +@@ -3338,8 +3343,13 @@ parser::parse_for (source_location) + "others with arity %d", oper, idb->nargs, arity); + + user_id *p = dyn_cast (idb); +- if (p && p->is_oper_list) +- op->substitutes.safe_splice (p->substitutes); ++ if (p) ++ { ++ if (p->is_oper_list) ++ op->substitutes.safe_splice (p->substitutes); ++ else ++ fatal_at (token, "iterator cannot be used as operator-list"); ++ } + else + op->substitutes.safe_push (idb); + } +@@ -3436,6 +3446,11 @@ parser::parse_operator_list (source_location) + op->substitutes.safe_push (idb); + } + ++ // Check that there is no junk after id-list ++ token = peek(); ++ if (token->type != CPP_CLOSE_PAREN) ++ fatal_at (token, "expected identifier got %s", cpp_type2name (token->type, 0)); ++ + if (op->substitutes.length () == 0) + fatal_at (token, "operator-list cannot be empty"); + +--- a/src/gcc/genpreds.c ++++ b/src/gcc/genpreds.c +@@ -640,12 +640,14 @@ struct constraint_data + const char *regclass; /* for register constraints */ + rtx exp; /* for other constraints */ + unsigned int lineno; /* line of definition */ +- unsigned int is_register : 1; +- unsigned int is_const_int : 1; +- unsigned int is_const_dbl : 1; +- unsigned int is_extra : 1; +- unsigned int is_memory : 1; +- unsigned int is_address : 1; ++ unsigned int is_register : 1; ++ unsigned int is_const_int : 1; ++ unsigned int is_const_dbl : 1; ++ unsigned int is_extra : 1; ++ unsigned int is_memory : 1; ++ unsigned int is_address : 1; ++ unsigned int maybe_allows_reg : 1; ++ unsigned int maybe_allows_mem : 1; }; - const struct tune_params arm_fa726te_tune = - { - arm_9e_rtx_costs, -- NULL, -+ NULL, /* Insn extra costs. */ - fa726te_sched_adjust_cost, -+ arm_default_branch_cost, -+ &arm_default_vec_cost, - 1, /* Constant limit. */ - 5, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ - ARM_PREFETCH_NOT_BENEFICIAL, -- true, /* Prefer constant pool. */ -- arm_default_branch_cost, -- false, /* Prefer LDRD/STRD. */ -- {true, true}, /* Prefer non short circuit. */ -- &arm_default_vec_cost, /* Vectorizer costs. */ -- false, /* Prefer Neon for 64-bits bitops. */ -- false, false, /* Prefer 32-bit encodings. */ -- false, /* Prefer Neon for stringops. */ -- 8, /* Maximum insns to inline memset. */ -- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ -- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ -+ tune_params::PREF_CONST_POOL_TRUE, -+ tune_params::PREF_LDRD_FALSE, -+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_NEITHER, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_FALSE, -+ tune_params::FUSE_NOTHING, -+ tune_params::SCHED_AUTOPREF_OFF - }; + /* Overview of all constraints beginning with a given letter. */ +@@ -691,6 +693,9 @@ static unsigned int satisfied_start; + static unsigned int const_int_start, const_int_end; + static unsigned int memory_start, memory_end; + static unsigned int address_start, address_end; ++static unsigned int maybe_allows_none_start, maybe_allows_none_end; ++static unsigned int maybe_allows_reg_start, maybe_allows_reg_end; ++static unsigned int maybe_allows_mem_start, maybe_allows_mem_end; + + /* Convert NAME, which contains angle brackets and/or underscores, to + a string that can be used as part of a C identifier. The string +@@ -711,6 +716,34 @@ mangle (const char *name) + return XOBFINISH (rtl_obstack, const char *); + } + ++/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP ++ maybe allows a MEM. Bits should be clear only when we are sure it ++ will not allow a REG/SUBREG or a MEM. */ ++static int ++compute_maybe_allows (rtx exp) ++{ ++ switch (GET_CODE (exp)) ++ { ++ case IF_THEN_ELSE: ++ /* Conservative answer is like IOR, of the THEN and ELSE branches. */ ++ return compute_maybe_allows (XEXP (exp, 1)) ++ | compute_maybe_allows (XEXP (exp, 2)); ++ case AND: ++ return compute_maybe_allows (XEXP (exp, 0)) ++ & compute_maybe_allows (XEXP (exp, 1)); ++ case IOR: ++ return compute_maybe_allows (XEXP (exp, 0)) ++ | compute_maybe_allows (XEXP (exp, 1)); ++ case MATCH_CODE: ++ if (*XSTR (exp, 1) == '\0') ++ return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0) ++ | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0); ++ /* FALLTHRU */ ++ default: ++ return 3; ++ } ++} ++ + /* Add one constraint, of any sort, to the tables. NAME is its name; + REGCLASS is the register class, if any; EXP is the expression to + test, if any; IS_MEMORY and IS_ADDRESS indicate memory and address +@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass, + c->is_extra = !(regclass || is_const_int || is_const_dbl); + c->is_memory = is_memory; + c->is_address = is_address; ++ int maybe_allows = 3; ++ if (exp) ++ maybe_allows = compute_maybe_allows (exp); ++ c->maybe_allows_reg = (maybe_allows & 1) != 0; ++ c->maybe_allows_mem = (maybe_allows & 2) != 0; + + c->next_this_letter = *slot; + *slot = c; +@@ -940,8 +978,30 @@ choose_enum_order (void) + enum_order[next++] = c; + address_end = next; + ++ maybe_allows_none_start = next; ++ FOR_ALL_CONSTRAINTS (c) ++ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address ++ && !c->maybe_allows_reg && !c->maybe_allows_mem) ++ enum_order[next++] = c; ++ maybe_allows_none_end = next; ++ ++ maybe_allows_reg_start = next; ++ FOR_ALL_CONSTRAINTS (c) ++ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address ++ && c->maybe_allows_reg && !c->maybe_allows_mem) ++ enum_order[next++] = c; ++ maybe_allows_reg_end = next; ++ ++ maybe_allows_mem_start = next; ++ FOR_ALL_CONSTRAINTS (c) ++ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address ++ && !c->maybe_allows_reg && c->maybe_allows_mem) ++ enum_order[next++] = c; ++ maybe_allows_mem_end = next; ++ + FOR_ALL_CONSTRAINTS (c) +- if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address) ++ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address ++ && c->maybe_allows_reg && c->maybe_allows_mem) + enum_order[next++] = c; + gcc_assert (next == num_constraints); + } +@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end) + "}\n\n", name); + } + ++/* Write a definition for insn_extra_constraint_allows_reg_mem function. */ ++static void ++write_allows_reg_mem_function (void) ++{ ++ printf ("static inline void\n" ++ "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n" ++ "\t\t\t\t bool *allows_reg, bool *allows_mem)\n" ++ "{\n"); ++ if (maybe_allows_none_start != maybe_allows_none_end) ++ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" ++ " return;\n", ++ enum_order[maybe_allows_none_start]->c_name, ++ enum_order[maybe_allows_none_end - 1]->c_name); ++ if (maybe_allows_reg_start != maybe_allows_reg_end) ++ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" ++ " {\n" ++ " *allows_reg = true;\n" ++ " return;\n" ++ " }\n", ++ enum_order[maybe_allows_reg_start]->c_name, ++ enum_order[maybe_allows_reg_end - 1]->c_name); ++ if (maybe_allows_mem_start != maybe_allows_mem_end) ++ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" ++ " {\n" ++ " *allows_mem = true;\n" ++ " return;\n" ++ " }\n", ++ enum_order[maybe_allows_mem_start]->c_name, ++ enum_order[maybe_allows_mem_end - 1]->c_name); ++ printf (" (void) c;\n" ++ " *allows_reg = true;\n" ++ " *allows_mem = true;\n" ++ "}\n\n"); ++} ++ + /* VEC is a list of key/value pairs, with the keys being lower bounds + of a range. Output a decision tree that handles the keys covered by + [VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s. +@@ -1326,6 +1421,7 @@ write_tm_preds_h (void) + memory_start, memory_end); + write_range_function ("insn_extra_address_constraint", + address_start, address_end); ++ write_allows_reg_mem_function (); + + if (constraint_max_namelen > 1) + { +--- a/src/gcc/go/Make-lang.in ++++ b/src/gcc/go/Make-lang.in +@@ -197,6 +197,7 @@ go.uninstall: + go.mostlyclean: + -rm -f go/*$(objext) + -rm -f go/*$(coverageexts) ++ -rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext) + go.clean: + go.distclean: + go.maintainer-clean: +--- a/src/gcc/ifcvt.c ++++ b/src/gcc/ifcvt.c +@@ -1046,7 +1046,7 @@ cc_in_cond (rtx cond) + + /* Return sequence of instructions generated by if conversion. This + function calls end_sequence() to end the current stream, ensures +- that are instructions are unshared, recognizable non-jump insns. ++ that the instructions are unshared, recognizable non-jump insns. + On failure, this function returns a NULL_RTX. */ + static rtx_insn * +--- a/src/gcc/ira-costs.c ++++ b/src/gcc/ira-costs.c +@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref) + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); -@@ -3140,31 +3217,33 @@ arm_option_override (void) - && abi_version_at_least(2)) - flag_strict_volatile_bitfields = 1; +- dest = SET_DEST (set); +- src = SET_SRC (set); + if (GET_CODE (dest) == SUBREG + && (GET_MODE_SIZE (GET_MODE (dest)) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))) +--- a/src/gcc/jit/Make-lang.in ++++ b/src/gcc/jit/Make-lang.in +@@ -285,6 +285,10 @@ jit.uninstall: + # We just have to delete files specific to us. -- /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed -- it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ -+ /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we -+ have deemed it beneficial (signified by setting -+ prefetch.num_slots to 1 or more). */ - if (flag_prefetch_loop_arrays < 0 - && HAVE_prefetch - && optimize >= 3 -- && current_tune->num_prefetch_slots > 0) -+ && current_tune->prefetch.num_slots > 0) - flag_prefetch_loop_arrays = 1; + jit.mostlyclean: ++ -rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK) ++ -rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME) ++ -rm -f $(LIBGCCJIT_SONAME) ++ -rm -f $(jit_OBJS) -- /* Set up parameters to be used in prefetching algorithm. Do not override the -- defaults unless we are tuning for a core we have researched values for. */ -- if (current_tune->num_prefetch_slots > 0) -+ /* Set up parameters to be used in prefetching algorithm. Do not -+ override the defaults unless we are tuning for a core we have -+ researched values for. */ -+ if (current_tune->prefetch.num_slots > 0) - maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, -- current_tune->num_prefetch_slots, -- global_options.x_param_values, -- global_options_set.x_param_values); -- if (current_tune->l1_cache_line_size >= 0) -+ current_tune->prefetch.num_slots, -+ global_options.x_param_values, -+ global_options_set.x_param_values); -+ if (current_tune->prefetch.l1_cache_line_size >= 0) - maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, -- current_tune->l1_cache_line_size, -- global_options.x_param_values, -- global_options_set.x_param_values); -- if (current_tune->l1_cache_size >= 0) -+ current_tune->prefetch.l1_cache_line_size, -+ global_options.x_param_values, -+ global_options_set.x_param_values); -+ if (current_tune->prefetch.l1_cache_size >= 0) - maybe_set_param_value (PARAM_L1_CACHE_SIZE, -- current_tune->l1_cache_size, -- global_options.x_param_values, -- global_options_set.x_param_values); -+ current_tune->prefetch.l1_cache_size, -+ global_options.x_param_values, -+ global_options_set.x_param_values); + jit.clean: - /* Use Neon to perform 64-bits operations rather than core - registers. */ -@@ -3174,24 +3253,35 @@ arm_option_override (void) +--- a/src/gcc/loop-invariant.c ++++ b/src/gcc/loop-invariant.c +@@ -696,6 +696,8 @@ find_defs (struct loop *loop) + df_remove_problem (df_chain); + df_process_deferred_rescans (); + df_chain_add_problem (DF_UD_CHAIN); ++ df_live_add_problem (); ++ df_live_set_all_dirty (); + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_analyze_loop (loop); + check_invariant_table_size (); +@@ -740,8 +742,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on, + enough to not regress 410.bwaves either (by still moving reg+reg + invariants). + See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */ +- inv->cheap_address = address_cost (SET_SRC (set), word_mode, +- ADDR_SPACE_GENERIC, speed) < 3; ++ if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set)))) ++ inv->cheap_address = address_cost (SET_SRC (set), word_mode, ++ ADDR_SPACE_GENERIC, speed) < 3; ++ else ++ inv->cheap_address = false; + } + else + { +@@ -1174,6 +1179,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed, + } - /* Use the alternative scheduling-pressure algorithm by default. */ - maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, -- global_options.x_param_values, -- global_options_set.x_param_values); -+ global_options.x_param_values, -+ global_options_set.x_param_values); + if (!inv->cheap_address ++ || inv->def->n_uses == 0 + || inv->def->n_addr_uses < inv->def->n_uses) + (*comp_cost) += inv->cost * inv->eqno; - /* Look through ready list and all of queue for instructions - relevant for L2 auto-prefetcher. */ - int param_sched_autopref_queue_depth; -- if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF) -- param_sched_autopref_queue_depth = -1; -- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK) -- param_sched_autopref_queue_depth = 0; -- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL) -- param_sched_autopref_queue_depth = max_insn_queue_index + 1; -- else -- gcc_unreachable (); +@@ -1512,6 +1518,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group) + return 1; + } + ++/* Whether invariant INV setting REG can be moved out of LOOP, at the end of ++ the block preceding its header. */ + -+ switch (current_tune->sched_autopref) ++static bool ++can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg) ++{ ++ df_ref def, use; ++ unsigned int dest_regno, defs_in_loop_count = 0; ++ rtx_insn *insn = inv->insn; ++ basic_block bb = BLOCK_FOR_INSN (inv->insn); ++ ++ /* We ignore hard register and memory access for cost and complexity reasons. ++ Hard register are few at this stage and expensive to consider as they ++ require building a separate data flow. Memory access would require using ++ df_simulate_* and can_move_insns_across functions and is more complex. */ ++ if (!REG_P (reg) || HARD_REGISTER_P (reg)) ++ return false; ++ ++ /* Check whether the set is always executed. We could omit this condition if ++ we know that the register is unused outside of the loop, but it does not ++ seem worth finding out. */ ++ if (!inv->always_executed) ++ return false; ++ ++ /* Check that all uses that would be dominated by def are already dominated ++ by it. */ ++ dest_regno = REGNO (reg); ++ for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use)) + { -+ case tune_params::SCHED_AUTOPREF_OFF: -+ param_sched_autopref_queue_depth = -1; -+ break; ++ rtx_insn *use_insn; ++ basic_block use_bb; + -+ case tune_params::SCHED_AUTOPREF_RANK: -+ param_sched_autopref_queue_depth = 0; -+ break; ++ use_insn = DF_REF_INSN (use); ++ use_bb = BLOCK_FOR_INSN (use_insn); + -+ case tune_params::SCHED_AUTOPREF_FULL: -+ param_sched_autopref_queue_depth = max_insn_queue_index + 1; -+ break; ++ /* Ignore instruction considered for moving. */ ++ if (use_insn == insn) ++ continue; + -+ default: -+ gcc_unreachable (); ++ /* Don't consider uses outside loop. */ ++ if (!flow_bb_inside_loop_p (loop, use_bb)) ++ continue; ++ ++ /* Don't move if a use is not dominated by def in insn. */ ++ if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn)) ++ return false; ++ if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb)) ++ return false; ++ } ++ ++ /* Check for other defs. Any other def in the loop might reach a use ++ currently reached by the def in insn. */ ++ for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def)) ++ { ++ basic_block def_bb = DF_REF_BB (def); ++ ++ /* Defs in exit block cannot reach a use they weren't already. */ ++ if (single_succ_p (def_bb)) ++ { ++ basic_block def_bb_succ; ++ ++ def_bb_succ = single_succ (def_bb); ++ if (!flow_bb_inside_loop_p (loop, def_bb_succ)) ++ continue; ++ } ++ ++ if (++defs_in_loop_count > 1) ++ return false; + } + - maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, - param_sched_autopref_queue_depth, -- global_options.x_param_values, -- global_options_set.x_param_values); -+ global_options.x_param_values, -+ global_options_set.x_param_values); ++ return true; ++} ++ + /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false + otherwise. */ - /* Disable shrink-wrap when optimizing function for size, since it tends to - generate additional returns. */ -@@ -7946,236 +8036,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode) - return x; - } +@@ -1545,11 +1624,8 @@ move_invariant_reg (struct loop *loop, unsigned invno) + } + } --bool --arm_legitimize_reload_address (rtx *p, -- machine_mode mode, -- int opnum, int type, -- int ind_levels ATTRIBUTE_UNUSED) --{ -- /* We must recognize output that we have already generated ourselves. */ -- if (GET_CODE (*p) == PLUS -- && GET_CODE (XEXP (*p, 0)) == PLUS -- && REG_P (XEXP (XEXP (*p, 0), 0)) -- && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) -- && CONST_INT_P (XEXP (*p, 1))) -- { -- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, -- MODE_BASE_REG_CLASS (mode), GET_MODE (*p), -- VOIDmode, 0, 0, opnum, (enum reload_type) type); -- return true; -- } -- -- if (GET_CODE (*p) == PLUS -- && REG_P (XEXP (*p, 0)) -- && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) -- /* If the base register is equivalent to a constant, let the generic -- code handle it. Otherwise we will run into problems if a future -- reload pass decides to rematerialize the constant. */ -- && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0))) -- && CONST_INT_P (XEXP (*p, 1))) -- { -- HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); -- HOST_WIDE_INT low, high; -- -- /* Detect coprocessor load/stores. */ -- bool coproc_p = ((TARGET_HARD_FLOAT -- && TARGET_VFP -- && (mode == SFmode || mode == DFmode)) -- || (TARGET_REALLY_IWMMXT -- && VALID_IWMMXT_REG_MODE (mode)) -- || (TARGET_NEON -- && (VALID_NEON_DREG_MODE (mode) -- || VALID_NEON_QREG_MODE (mode)))); -- -- /* For some conditions, bail out when lower two bits are unaligned. */ -- if ((val & 0x3) != 0 -- /* Coprocessor load/store indexes are 8-bits + '00' appended. */ -- && (coproc_p -- /* For DI, and DF under soft-float: */ -- || ((mode == DImode || mode == DFmode) -- /* Without ldrd, we use stm/ldm, which does not -- fair well with unaligned bits. */ -- && (! TARGET_LDRD -- /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */ -- || TARGET_THUMB2)))) -- return false; -- -- /* When breaking down a [reg+index] reload address into [(reg+high)+low], -- of which the (reg+high) gets turned into a reload add insn, -- we try to decompose the index into high/low values that can often -- also lead to better reload CSE. -- For example: -- ldr r0, [r2, #4100] // Offset too large -- ldr r1, [r2, #4104] // Offset too large -- -- is best reloaded as: -- add t1, r2, #4096 -- ldr r0, [t1, #4] -- add t2, r2, #4096 -- ldr r1, [t2, #8] -- -- which post-reload CSE can simplify in most cases to eliminate the -- second add instruction: -- add t1, r2, #4096 -- ldr r0, [t1, #4] -- ldr r1, [t1, #8] -- -- The idea here is that we want to split out the bits of the constant -- as a mask, rather than as subtracting the maximum offset that the -- respective type of load/store used can handle. -- -- When encountering negative offsets, we can still utilize it even if -- the overall offset is positive; sometimes this may lead to an immediate -- that can be constructed with fewer instructions. -- For example: -- ldr r0, [r2, #0x3FFFFC] -- -- This is best reloaded as: -- add t1, r2, #0x400000 -- ldr r0, [t1, #-4] -- -- The trick for spotting this for a load insn with N bits of offset -- (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a -- negative offset that is going to make bit N and all the bits below -- it become zero in the remainder part. -- -- The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect -- to sign-magnitude addressing (i.e. separate +- bit, or 1's complement), -- used in most cases of ARM load/store instructions. */ -- --#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \ -- (((VAL) & ((1 << (N)) - 1)) \ -- ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \ -- : 0) -- -- if (coproc_p) -- { -- low = SIGN_MAG_LOW_ADDR_BITS (val, 10); -- -- /* NEON quad-word load/stores are made of two double-word accesses, -- so the valid index range is reduced by 8. Treat as 9-bit range if -- we go over it. */ -- if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016) -- low = SIGN_MAG_LOW_ADDR_BITS (val, 9); -- } -- else if (GET_MODE_SIZE (mode) == 8) -- { -- if (TARGET_LDRD) -- low = (TARGET_THUMB2 -- ? SIGN_MAG_LOW_ADDR_BITS (val, 10) -- : SIGN_MAG_LOW_ADDR_BITS (val, 8)); -- else -- /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib) -- to access doublewords. The supported load/store offsets are -- -8, -4, and 4, which we try to produce here. */ -- low = ((val & 0xf) ^ 0x8) - 0x8; -- } -- else if (GET_MODE_SIZE (mode) < 8) -- { -- /* NEON element load/stores do not have an offset. */ -- if (TARGET_NEON_FP16 && mode == HFmode) -- return false; -- -- if (TARGET_THUMB2) -- { -- /* Thumb-2 has an asymmetrical index range of (-256,4096). -- Try the wider 12-bit range first, and re-try if the result -- is out of range. */ -- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); -- if (low < -255) -- low = SIGN_MAG_LOW_ADDR_BITS (val, 8); -- } -- else -- { -- if (mode == HImode || mode == HFmode) -- { -- if (arm_arch4) -- low = SIGN_MAG_LOW_ADDR_BITS (val, 8); -- else -- { -- /* The storehi/movhi_bytes fallbacks can use only -- [-4094,+4094] of the full ldrb/strb index range. */ -- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); -- if (low == 4095 || low == -4095) -- return false; -- } -- } -- else -- low = SIGN_MAG_LOW_ADDR_BITS (val, 12); -- } -- } -- else -- return false; -- -- high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) -- ^ (unsigned HOST_WIDE_INT) 0x80000000) -- - (unsigned HOST_WIDE_INT) 0x80000000); -- /* Check for overflow or zero */ -- if (low == 0 || high == 0 || (high + low != val)) -- return false; -- -- /* Reload the high part into a base reg; leave the low part -- in the mem. -- Note that replacing this gen_rtx_PLUS with plus_constant is -- wrong in this case because we rely on the -- (plus (plus reg c1) c2) structure being preserved so that -- XEXP (*p, 0) in push_reload below uses the correct term. */ -- *p = gen_rtx_PLUS (GET_MODE (*p), -- gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), -- GEN_INT (high)), -- GEN_INT (low)); -- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, -- MODE_BASE_REG_CLASS (mode), GET_MODE (*p), -- VOIDmode, 0, 0, opnum, (enum reload_type) type); -- return true; -- } -- -- return false; --} -- --rtx --thumb_legitimize_reload_address (rtx *x_p, -- machine_mode mode, -- int opnum, int type, -- int ind_levels ATTRIBUTE_UNUSED) --{ -- rtx x = *x_p; -- -- if (GET_CODE (x) == PLUS -- && GET_MODE_SIZE (mode) < 4 -- && REG_P (XEXP (x, 0)) -- && XEXP (x, 0) == stack_pointer_rtx -- && CONST_INT_P (XEXP (x, 1)) -- && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) -- { -- rtx orig_x = x; -- -- x = copy_rtx (x); -- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), -- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); -- return x; -- } -- -- /* If both registers are hi-regs, then it's better to reload the -- entire expression rather than each register individually. That -- only requires one reload register rather than two. */ -- if (GET_CODE (x) == PLUS -- && REG_P (XEXP (x, 0)) -- && REG_P (XEXP (x, 1)) -- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) -- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) -- { -- rtx orig_x = x; -- -- x = copy_rtx (x); -- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), -- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); -- return x; -- } -- -- return NULL; --} -- - /* Return TRUE if X contains any TLS symbol references. */ +- /* Move the set out of the loop. If the set is always executed (we could +- omit this condition if we know that the register is unused outside of +- the loop, but it does not seem worth finding out) and it has no uses +- that would not be dominated by it, we may just move it (TODO). +- Otherwise we need to create a temporary register. */ ++ /* If possible, just move the set out of the loop. Otherwise, we ++ need to create a temporary register. */ + set = single_set (inv->insn); + reg = dest = SET_DEST (set); + if (GET_CODE (reg) == SUBREG) +@@ -1557,20 +1633,27 @@ move_invariant_reg (struct loop *loop, unsigned invno) + if (REG_P (reg)) + regno = REGNO (reg); + +- reg = gen_reg_rtx_and_attrs (dest); ++ if (!can_move_invariant_reg (loop, inv, dest)) ++ { ++ reg = gen_reg_rtx_and_attrs (dest); - bool -@@ -9399,7 +9259,8 @@ static bool - arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) - { - const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; -- gcc_assert (GET_CODE (x) == UNSPEC); -+ rtx_code code = GET_CODE (x); -+ gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE); +- /* Try replacing the destination by a new pseudoregister. */ +- validate_change (inv->insn, &SET_DEST (set), reg, true); ++ /* Try replacing the destination by a new pseudoregister. */ ++ validate_change (inv->insn, &SET_DEST (set), reg, true); - switch (XINT (x, 1)) - { -@@ -9445,7 +9306,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) - *cost = COSTS_N_INSNS (2); - break; - } -- return false; -+ return true; - } +- /* As well as all the dominated uses. */ +- replace_uses (inv, reg, true); ++ /* As well as all the dominated uses. */ ++ replace_uses (inv, reg, true); - /* Cost of a libcall. We assume one insn per argument, an amount for the -@@ -11008,6 +10869,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, - *cost = LIBCALL_COST (1); - return false; +- /* And validate all the changes. */ +- if (!apply_change_group ()) +- goto fail; ++ /* And validate all the changes. */ ++ if (!apply_change_group ()) ++ goto fail; -+ case UNSPEC_VOLATILE: - case UNSPEC: - return arm_unspec_cost (x, outer_code, speed_p, cost); +- emit_insn_after (gen_move_insn (dest, reg), inv->insn); ++ emit_insn_after (gen_move_insn (dest, reg), inv->insn); ++ } ++ else if (dump_file) ++ fprintf (dump_file, "Invariant %d moved without introducing a new " ++ "temporary register\n", invno); + reorder_insns (inv->insn, inv->insn, BB_END (preheader)); ++ df_recompute_luids (preheader); -@@ -17287,14 +17149,16 @@ thumb2_reorg (void) + /* If there is a REG_EQUAL note on the insn we just moved, and the + insn is in a basic block that is not always executed or the note +--- a/src/gcc/lra-constraints.c ++++ b/src/gcc/lra-constraints.c +@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, + { + HARD_REG_SET temp; + +- // ??? Is this assert right +- // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass])); ++ lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set)); + COPY_HARD_REG_SET (temp, set); + AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs); + return (hard_reg_set_subset_p +--- a/src/gcc/objc/Make-lang.in ++++ b/src/gcc/objc/Make-lang.in +@@ -114,6 +114,7 @@ objc.uninstall: + objc.mostlyclean: + -rm -f objc/*$(objext) objc/xforward objc/fflags + -rm -f objc/*$(coverageexts) ++ -rm -f cc1obj$(exeext) + objc.clean: objc.mostlyclean + -rm -rf objc-headers + objc.distclean: +--- a/src/gcc/objcp/Make-lang.in ++++ b/src/gcc/objcp/Make-lang.in +@@ -142,6 +142,7 @@ obj-c++.uninstall: + obj-c++.mostlyclean: + -rm -f objcp/*$(objext) + -rm -f objcp/*$(coverageexts) ++ -rm -f cc1objplus$(exeext) + obj-c++.clean: obj-c++.mostlyclean + obj-c++.distclean: + -rm -f objcp/config.status objcp/Makefile +--- a/src/gcc/optabs.c ++++ b/src/gcc/optabs.c +@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, + { + struct expand_operand ops[2]; + rtx rtx_op0, rtx_op1; ++ machine_mode m0, m1; + enum rtx_code rcode = get_rtx_code (tcode, unsignedp); - FOR_EACH_BB_FN (bb, cfun) - { -- if (current_tune->disparage_flag_setting_t16_encodings -+ if ((current_tune->disparage_flag_setting_t16_encodings -+ == tune_params::DISPARAGE_FLAGS_ALL) - && optimize_bb_for_speed_p (bb)) - continue; + gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison); - rtx_insn *insn; - Convert_Action action = SKIP; - Convert_Action action_for_partial_flag_setting -- = (current_tune->disparage_partial_flag_setting_t16_encodings -+ = ((current_tune->disparage_flag_setting_t16_encodings -+ != tune_params::DISPARAGE_FLAGS_NEITHER) - && optimize_bb_for_speed_p (bb)) - ? SKIP : CONV; +- /* Expand operands. */ ++ /* Expand operands. For vector types with scalar modes, e.g. where int64x1_t ++ has mode DImode, this can produce a constant RTX of mode VOIDmode; in such ++ cases, use the original mode. */ + rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)), + EXPAND_STACK_PARM); ++ m0 = GET_MODE (rtx_op0); ++ if (m0 == VOIDmode) ++ m0 = TYPE_MODE (TREE_TYPE (t_op0)); ++ + rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)), + EXPAND_STACK_PARM); ++ m1 = GET_MODE (rtx_op1); ++ if (m1 == VOIDmode) ++ m1 = TYPE_MODE (TREE_TYPE (t_op1)); -@@ -25660,12 +25524,12 @@ arm_print_tune_info (void) - current_tune->constant_limit); - asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n", - current_tune->max_insns_skipped); -- asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n", -- current_tune->num_prefetch_slots); -- asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n", -- current_tune->l1_cache_size); -- asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n", -- current_tune->l1_cache_line_size); -+ asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n", -+ current_tune->prefetch.num_slots); -+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n", -+ current_tune->prefetch.l1_cache_size); -+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n", -+ current_tune->prefetch.l1_cache_line_size); - asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n", - (int) current_tune->prefer_constant_pool); - asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n"); -@@ -25681,17 +25545,13 @@ arm_print_tune_info (void) - asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n", - (int) current_tune->prefer_ldrd_strd); - asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n", -- (int) current_tune->logical_op_non_short_circuit[0], -- (int) current_tune->logical_op_non_short_circuit[1]); -+ (int) current_tune->logical_op_non_short_circuit_thumb, -+ (int) current_tune->logical_op_non_short_circuit_arm); - asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n", - (int) current_tune->prefer_neon_for_64bits); - asm_fprintf (asm_out_file, - "\t\t@disparage_flag_setting_t16_encodings:\t%d\n", - (int) current_tune->disparage_flag_setting_t16_encodings); -- asm_fprintf (asm_out_file, -- "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n", -- (int) current_tune -- ->disparage_partial_flag_setting_t16_encodings); - asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n", - (int) current_tune->string_ops_prefer_neon); - asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n", -@@ -27213,40 +27073,12 @@ thumb2_output_casesi (rtx *operands) +- create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0)); +- create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1)); ++ create_input_operand (&ops[0], rtx_op0, m0); ++ create_input_operand (&ops[1], rtx_op1, m1); + if (!maybe_legitimize_operands (icode, 4, 2, ops)) + gcc_unreachable (); + return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value); +--- a/src/gcc/params.def ++++ b/src/gcc/params.def +@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH, + "Maximum depth of search in the dominator tree for expressions to hoist", + 30, 0, 0) + ++ ++/* When synthesizing expnonentiation by a real constant operations using square ++ roots, this controls how deep sqrt chains we are willing to generate. */ ++DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH, ++ "max-pow-sqrt-depth", ++ "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant", ++ 5, 1, 32) ++ + /* This parameter limits the number of insns in a loop that will be unrolled, + and by how much the loop is unrolled. + +--- a/src/gcc/regrename.c ++++ b/src/gcc/regrename.c +@@ -505,12 +505,20 @@ rename_chains (void) + continue; + } + +- if (dump_file) +- fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); +- +- regrename_do_replace (this_head, best_new_reg); +- tick[best_new_reg] = ++this_tick; +- df_set_regs_ever_live (best_new_reg, true); ++ if (regrename_do_replace (this_head, best_new_reg)) ++ { ++ if (dump_file) ++ fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); ++ tick[best_new_reg] = ++this_tick; ++ df_set_regs_ever_live (best_new_reg, true); ++ } ++ else ++ { ++ if (dump_file) ++ fprintf (dump_file, ", renaming as %s failed\n", ++ reg_names[best_new_reg]); ++ tick[reg] = ++this_tick; ++ } } } --/* Most ARM cores are single issue, but some newer ones can dual issue. -- The scheduler descriptions rely on this being correct. */ -+/* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the -+ per-core tuning structs. */ - static int - arm_issue_rate (void) +@@ -936,7 +944,13 @@ regrename_analyze (bitmap bb_mask) + bb->aux = NULL; + } + +-void ++/* Attempt to replace all uses of the register in the chain beginning with ++ HEAD with REG. Returns true on success and false if the replacement is ++ rejected because the insns would not validate. The latter can happen ++ e.g. if a match_parallel predicate enforces restrictions on register ++ numbering in its subpatterns. */ ++ ++bool + regrename_do_replace (struct du_head *head, int reg) { -- switch (arm_tune) -- { -- case xgene1: -- return 4; -- -- case cortexa15: -- case cortexa57: -- case exynosm1: -- return 3; -- -- case cortexm7: -- case cortexr4: -- case cortexr4f: -- case cortexr5: -- case genericv7a: -- case cortexa5: -- case cortexa7: -- case cortexa8: -- case cortexa9: -- case cortexa12: -- case cortexa17: -- case cortexa53: -- case fa726te: -- case marvell_pj4: -- return 2; + struct du_chain *chain; +@@ -950,22 +964,26 @@ regrename_do_replace (struct du_head *head, int reg) + int reg_ptr = REG_POINTER (*chain->loc); + + if (DEBUG_INSN_P (chain->insn) && REGNO (*chain->loc) != base_regno) +- INSN_VAR_LOCATION_LOC (chain->insn) = gen_rtx_UNKNOWN_VAR_LOC (); ++ validate_change (chain->insn, &(INSN_VAR_LOCATION_LOC (chain->insn)), ++ gen_rtx_UNKNOWN_VAR_LOC (), true); + else + { +- *chain->loc = gen_raw_REG (GET_MODE (*chain->loc), reg); ++ validate_change (chain->insn, chain->loc, ++ gen_raw_REG (GET_MODE (*chain->loc), reg), true); + if (regno >= FIRST_PSEUDO_REGISTER) + ORIGINAL_REGNO (*chain->loc) = regno; + REG_ATTRS (*chain->loc) = attr; + REG_POINTER (*chain->loc) = reg_ptr; + } - -- default: -- return 1; -- } -+ return current_tune->issue_rate; +- df_insn_rescan (chain->insn); + } + ++ if (!apply_change_group ()) ++ return false; ++ + mode = GET_MODE (*head->first->loc); + head->regno = reg; + head->nregs = hard_regno_nregs[reg][mode]; ++ return true; } - /* Return how many instructions should scheduler lookahead to choose the -@@ -29411,7 +29243,7 @@ arm_gen_setmem (rtx *operands) - static bool - arm_macro_fusion_p (void) - { -- return current_tune->fuseable_ops != ARM_FUSE_NOTHING; -+ return current_tune->fuseable_ops != tune_params::FUSE_NOTHING; + +--- a/src/gcc/regrename.h ++++ b/src/gcc/regrename.h +@@ -91,6 +91,6 @@ extern void regrename_analyze (bitmap); + extern du_head_p regrename_chain_from_id (unsigned int); + extern int find_rename_reg (du_head_p, enum reg_class, HARD_REG_SET *, int, + bool); +-extern void regrename_do_replace (du_head_p, int); ++extern bool regrename_do_replace (du_head_p, int); + + #endif +--- a/src/gcc/rtlanal.c ++++ b/src/gcc/rtlanal.c +@@ -104,7 +104,10 @@ generic_subrtx_iterator ::add_single_to_queue (array_type &array, + return base; + } + gcc_checking_assert (i == LOCAL_ELEMS); +- vec_safe_grow (array.heap, i + 1); ++ /* A previous iteration might also have moved from the stack to the ++ heap, in which case the heap array will already be big enough. */ ++ if (vec_safe_length (array.heap) <= i) ++ vec_safe_grow (array.heap, i + 1); + base = array.heap->address (); + memcpy (base, array.stack, sizeof (array.stack)); + base[LOCAL_ELEMS] = x; +--- a/src/gcc/sched-deps.c ++++ b/src/gcc/sched-deps.c +@@ -2856,7 +2856,7 @@ sched_analyze_2 (struct deps_desc *deps, rtx x, rtx_insn *insn) + sched_deps_info->finish_rhs (); } +-/* Try to group two fuseable insns together to prevent scheduler ++/* Try to group two fusible insns together to prevent scheduler + from scheduling them apart. */ -@@ -29432,44 +29264,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) - if (!arm_macro_fusion_p ()) - return false; + static void +--- a/src/gcc/simplify-rtx.c ++++ b/src/gcc/simplify-rtx.c +@@ -978,6 +978,32 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) + if (GET_CODE (op) == NEG) + return XEXP (op, 0); + ++ /* (neg (x ? (neg y) : y)) == !x ? (neg y) : y. ++ If comparison is not reversible use ++ x ? y : (neg y). */ ++ if (GET_CODE (op) == IF_THEN_ELSE) ++ { ++ rtx cond = XEXP (op, 0); ++ rtx true_rtx = XEXP (op, 1); ++ rtx false_rtx = XEXP (op, 2); ++ ++ if ((GET_CODE (true_rtx) == NEG ++ && rtx_equal_p (XEXP (true_rtx, 0), false_rtx)) ++ || (GET_CODE (false_rtx) == NEG ++ && rtx_equal_p (XEXP (false_rtx, 0), true_rtx))) ++ { ++ if (reversed_comparison_code (cond, NULL_RTX) != UNKNOWN) ++ temp = reversed_comparison (cond, mode); ++ else ++ { ++ temp = cond; ++ std::swap (true_rtx, false_rtx); ++ } ++ return simplify_gen_ternary (IF_THEN_ELSE, mode, ++ mode, temp, true_rtx, false_rtx); ++ } ++ } ++ + /* (neg (plus X 1)) can become (not X). */ + if (GET_CODE (op) == PLUS + && XEXP (op, 1) == const1_rtx) +@@ -1171,7 +1197,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) + = (float_truncate:SF foo:DF). -- if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT) -+ if (current_tune->fuseable_ops & tune_params::FUSE_MOVW_MOVT) - { - /* We are trying to fuse -- movw imm / movt imm -- instructions as a group that gets scheduled together. */ -+ movw imm / movt imm -+ instructions as a group that gets scheduled together. */ + (float_truncate:DF (float_extend:XF foo:SF)) +- = (float_extend:SF foo:DF). */ ++ = (float_extend:DF foo:SF). */ + if ((GET_CODE (op) == FLOAT_TRUNCATE + && flag_unsafe_math_optimizations) + || GET_CODE (op) == FLOAT_EXTEND) +@@ -1183,14 +1209,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) + XEXP (op, 0), mode); - set_dest = SET_DEST (curr_set); + /* (float_truncate (float x)) is (float x) */ +- if (GET_CODE (op) == FLOAT ++ if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT) + && (flag_unsafe_math_optimizations + || (SCALAR_FLOAT_MODE_P (GET_MODE (op)) + && ((unsigned)significand_size (GET_MODE (op)) + >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))) + - num_sign_bit_copies (XEXP (op, 0), + GET_MODE (XEXP (op, 0)))))))) +- return simplify_gen_unary (FLOAT, mode, ++ return simplify_gen_unary (GET_CODE (op), mode, + XEXP (op, 0), + GET_MODE (XEXP (op, 0))); - if (GET_MODE (set_dest) != SImode) -- return false; -+ return false; +@@ -1221,7 +1247,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) + rounding can't happen. + */ + if (GET_CODE (op) == FLOAT_EXTEND +- || (GET_CODE (op) == FLOAT ++ || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT) + && SCALAR_FLOAT_MODE_P (GET_MODE (op)) + && ((unsigned)significand_size (GET_MODE (op)) + >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))) +--- a/src/gcc/stmt.c ++++ b/src/gcc/stmt.c +@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num, + else if (insn_extra_memory_constraint (cn)) + *allows_mem = true; + else +- { +- /* Otherwise we can't assume anything about the nature of +- the constraint except that it isn't purely registers. +- Treat it like "g" and hope for the best. */ +- *allows_reg = true; +- *allows_mem = true; +- } ++ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem); + break; + } - /* We are trying to match: -- prev (movw) == (set (reg r0) (const_int imm16)) -- curr (movt) == (set (zero_extract (reg r0) -- (const_int 16) -- (const_int 16)) -- (const_int imm16_1)) -- or -- prev (movw) == (set (reg r1) -- (high (symbol_ref ("SYM")))) -- curr (movt) == (set (reg r0) -- (lo_sum (reg r1) -- (symbol_ref ("SYM")))) */ -+ prev (movw) == (set (reg r0) (const_int imm16)) -+ curr (movt) == (set (zero_extract (reg r0) -+ (const_int 16) -+ (const_int 16)) -+ (const_int imm16_1)) -+ or -+ prev (movw) == (set (reg r1) -+ (high (symbol_ref ("SYM")))) -+ curr (movt) == (set (reg r0) -+ (lo_sum (reg r1) -+ (symbol_ref ("SYM")))) */ - if (GET_CODE (set_dest) == ZERO_EXTRACT) -- { -- if (CONST_INT_P (SET_SRC (curr_set)) -- && CONST_INT_P (SET_SRC (prev_set)) -- && REG_P (XEXP (set_dest, 0)) -- && REG_P (SET_DEST (prev_set)) -- && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) -- return true; -- } -+ { -+ if (CONST_INT_P (SET_SRC (curr_set)) -+ && CONST_INT_P (SET_SRC (prev_set)) -+ && REG_P (XEXP (set_dest, 0)) -+ && REG_P (SET_DEST (prev_set)) -+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) -+ return true; -+ } - else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM -- && REG_P (SET_DEST (curr_set)) -- && REG_P (SET_DEST (prev_set)) -- && GET_CODE (SET_SRC (prev_set)) == HIGH -- && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) -- return true; -+ && REG_P (SET_DEST (curr_set)) -+ && REG_P (SET_DEST (prev_set)) -+ && GET_CODE (SET_SRC (prev_set)) == HIGH -+ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) -+ return true; - } - return false; - } ---- a/src/gcc/config/arm/arm.h -+++ b/src/gcc/config/arm/arm.h -@@ -1360,46 +1360,6 @@ enum reg_class - ? GENERAL_REGS : NO_REGS) \ - : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X))) +@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num, + else if (insn_extra_memory_constraint (cn)) + *allows_mem = true; + else +- { +- /* Otherwise we can't assume anything about the nature of +- the constraint except that it isn't purely registers. +- Treat it like "g" and hope for the best. */ +- *allows_reg = true; +- *allows_mem = true; +- } ++ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem); + break; + } --/* Try a machine-dependent way of reloading an illegitimate address -- operand. If we find one, push the reload and jump to WIN. This -- macro is used in only one place: `find_reloads_address' in reload.c. -- -- For the ARM, we wish to handle large displacements off a base -- register by splitting the addend across a MOV and the mem insn. -- This can cut the number of reloads needed. */ --#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \ -- do \ -- { \ -- if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \ -- goto WIN; \ -- } \ -- while (0) -- --/* XXX If an HImode FP+large_offset address is converted to an HImode -- SP+large_offset address, then reload won't know how to fix it. It sees -- only that SP isn't valid for HImode, and so reloads the SP into an index -- register, but the resulting address is still invalid because the offset -- is too big. We fix it here instead by reloading the entire address. */ --/* We could probably achieve better results by defining PROMOTE_MODE to help -- cope with the variances between the Thumb's signed and unsigned byte and -- halfword load instructions. */ --/* ??? This should be safe for thumb2, but we may be able to do better. */ --#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ --do { \ -- rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \ -- if (new_x) \ -- { \ -- X = new_x; \ -- goto WIN; \ -- } \ --} while (0) -- --#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \ -- if (TARGET_ARM) \ -- ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \ -- else \ -- THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) -- - /* Return the maximum number of consecutive registers - needed to represent mode MODE in a register of class CLASS. - ARM regs are UNITS_PER_WORD bits. -@@ -2096,10 +2056,11 @@ enum arm_auto_incmodes - (current_tune->branch_cost (speed_p, predictable_p)) +--- a/src/gcc/target.def ++++ b/src/gcc/target.def +@@ -1975,7 +1975,7 @@ merging.", + DEFHOOKPOD + (attribute_table, + "If defined, this target hook points to an array of @samp{struct\n\ +-attribute_spec} (defined in @file{tree.h}) specifying the machine\n\ ++attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\ + specific attributes for this target and some of the restrictions on the\n\ + entities to which these attributes are applied and the arguments they\n\ + take.", +--- a/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess1.c ++++ b/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess1.c +@@ -1,6 +1,7 @@ + /* Test -Wsizeof-pointer-memaccess warnings. */ + /* { dg-do compile } */ + /* { dg-options "-Wall -Wno-sizeof-array-argument" } */ ++/* { dg-options "-Wall -Wno-sizeof-array-argument -Wno-c++-compat" { target c } } */ - /* False if short circuit operation is preferred. */ --#define LOGICAL_OP_NON_SHORT_CIRCUIT \ -- ((optimize_size) \ -- ? (TARGET_THUMB ? false : true) \ -- : (current_tune->logical_op_non_short_circuit[TARGET_ARM])) -+#define LOGICAL_OP_NON_SHORT_CIRCUIT \ -+ ((optimize_size) \ -+ ? (TARGET_THUMB ? false : true) \ -+ : TARGET_THUMB ? static_cast (current_tune->logical_op_non_short_circuit_thumb) \ -+ : static_cast (current_tune->logical_op_non_short_circuit_arm)) + typedef __SIZE_TYPE__ size_t; + #ifdef __cplusplus +--- a/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c ++++ b/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c +@@ -1,6 +1,7 @@ + /* Test -Wsizeof-pointer-memaccess warnings. */ + /* { dg-do compile } */ + /* { dg-options "-Wall -O2 -Wno-sizeof-array-argument" } */ ++/* { dg-options "-Wall -O2 -Wno-sizeof-array-argument -Wno-c++-compat" {target c} } */ - - /* Position Independent Code. */ ---- a/src/gcc/config/arm/arm.md -+++ b/src/gcc/config/arm/arm.md -@@ -1177,9 +1177,9 @@ + #define bos(ptr) __builtin_object_size (ptr, 1) + #define bos0(ptr) __builtin_object_size (ptr, 0) +--- a/src/gcc/testsuite/c-c++-common/pr58346-1.c ++++ b/src/gcc/testsuite/c-c++-common/pr58346-1.c +@@ -1,5 +1,6 @@ + /* PR c/58346 */ + /* { dg-do compile } */ ++/* { dg-options "-Wno-c++-compat" { target c } } */ - ; ??? Check Thumb-2 split length - (define_insn_and_split "*arm_subsi3_insn" -- [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r") -- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n") -- (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))] -+ [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r") -+ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n") -+ (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))] - "TARGET_32BIT" - "@ - sub%?\\t%0, %1, %2 -@@ -2768,6 +2768,55 @@ - (const_string "logic_shift_reg")))] - ) + struct U { + #ifdef __cplusplus +--- a/src/gcc/testsuite/c-c++-common/transparent-union-1.c ++++ b/src/gcc/testsuite/c-c++-common/transparent-union-1.c +@@ -1,4 +1,5 @@ + /* PR c++/51228 */ ++/* { dg-options "-Wno-c++-compat" { target c } } */ -+;; Shifted bics pattern used to set up CC status register and not reusing -+;; bics output. Pattern restricts Thumb2 shift operand as bics for Thumb2 -+;; does not support shift by register. -+(define_insn "andsi_not_shiftsi_si_scc_no_reuse" -+ [(set (reg:CC_NOOV CC_REGNUM) -+ (compare:CC_NOOV -+ (and:SI (not:SI (match_operator:SI 0 "shift_operator" -+ [(match_operand:SI 1 "s_register_operand" "r") -+ (match_operand:SI 2 "arm_rhs_operand" "rM")])) -+ (match_operand:SI 3 "s_register_operand" "r")) -+ (const_int 0))) -+ (clobber (match_scratch:SI 4 "=r"))] -+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))" -+ "bic%.%?\\t%4, %3, %1%S0" -+ [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "conds" "set") -+ (set_attr "shift" "1") -+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "logic_shift_imm") -+ (const_string "logic_shift_reg")))] -+) + typedef union {} U __attribute__((transparent_union)); /* { dg-warning "ignored" } */ + +--- a/src/gcc/testsuite/g++.dg/ext/pr57735.C ++++ b/src/gcc/testsuite/g++.dg/ext/pr57735.C +@@ -1,4 +1,7 @@ + /* { dg-do compile { target arm*-*-* } } */ ++/* { dg-require-effective-target arm_arch_v5te_ok } */ ++/* { dg-require-effective-target arm_arm_ok } */ ++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */ + /* { dg-options "-march=armv5te -marm -mtune=xscale -mfloat-abi=soft -O1" } */ + + typedef unsigned int size_t; +--- a/src//dev/null ++++ b/src/gcc/testsuite/g++.dg/tree-ssa/pr66726.c +@@ -0,0 +1,36 @@ ++ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++/* Execution test for converting VIEW_CONVERT_EXPR. */ ++ ++struct cpp_num { ++ bool f; ++}; ++ ++extern cpp_num __attribute__((noinline)) ++foo (cpp_num lhs, ++ cpp_num rhs) ++{ ++ lhs.f = lhs.f || rhs.f; ++ return lhs; ++} ++ ++cpp_num lhs, rhs, r; ++ ++int main () ++{ ++ ++ lhs.f = false; ++ rhs.f = false; ++ r = foo (lhs, rhs); ++ if (r.f) ++ __builtin_abort (); ++ ++ ++ lhs.f = false; ++ rhs.f = true; ++ r = foo (lhs, rhs); ++ if (!r.f) ++ __builtin_abort (); ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.c-torture/compile/pr66168.c +@@ -0,0 +1,15 @@ ++int a, b; ++ ++void ++fn1 () ++{ ++ for (;;) ++ { ++ for (b = 0; b < 3; b++) ++ { ++ char e[2]; ++ char f = e[1]; ++ a ^= f ? 1 / f : 0; ++ } ++ } ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c +@@ -0,0 +1,34 @@ ++/* PR target/65648 */ ++ ++int a = 0, *b = 0, c = 0; ++static int d = 0; ++short e = 1; ++static long long f = 0; ++long long *i = &f; ++unsigned char j = 0; ++ ++__attribute__((noinline, noclone)) void ++foo (int x, int *y) ++{ ++ asm volatile ("" : : "r" (x), "r" (y) : "memory"); ++} ++ ++__attribute__((noinline, noclone)) void ++bar (const char *x, long long y) ++{ ++ asm volatile ("" : : "r" (x), "r" (&y) : "memory"); ++ if (y != 0) ++ __builtin_abort (); ++} ++ ++int ++main () ++{ ++ int k = 0; ++ b = &k; ++ j = (!a) - (c <= e); ++ *i = j; ++ foo (a, &k); ++ bar ("", f); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/Wcxx-compat-22.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Wc++-compat" } */ ++struct A {}; /* { dg-warning "empty struct has size 0 in C" } */ ++union B {}; /* { dg-warning "empty union has size 0 in C" } */ ++struct C { struct D {}; int x; }; /* { dg-warning "empty struct has size 0 in C|declaration does not declare anything" } */ ++struct E { union F {}; int x; }; /* { dg-warning "empty union has size 0 in C|declaration does not declare anything" } */ ++union G { union H {}; int x; }; /* { dg-warning "empty union has size 0 in C|declaration does not declare anything" } */ ++union I { struct J {}; int x; }; /* { dg-warning "empty struct has size 0 in C|declaration does not declare anything" } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/dx-test.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-dx" } */ ++ ++void f(void) ++{} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/loop-8.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ ++ ++void ++f (int *a, int *b) ++{ ++ int i; ++ ++ for (i = 0; i < 100; i++) ++ { ++ int d = 42; ++ ++ a[i] = d; ++ if (i % 2) ++ d = i; ++ b[i] = d; ++ } ++} ++ ++/* Load of 42 is moved out of the loop, introducing a new pseudo register. */ ++/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */ ++/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */ ++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/loop-9.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ ++ ++void ++f (double *a) ++{ ++ int i; ++ for (i = 0; i < 100; i++) ++ a[i] = 18.4242; ++} ++ ++/* Load of x is moved out of the loop. */ ++/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */ ++/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */ ++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile { target x86_64-*-* } } */ ++/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */ ++/* NOTE: The target list above could be extended to other targets that have ++ conditional moves, but don't have zero registers. */ ++ ++enum test_type ++{ ++ TYPE0, ++ TYPE1 ++}; ++ ++struct type_node ++{ ++ enum test_type type; ++}; ++ ++struct test_ref ++{ ++ struct type_node *referring; ++}; ++ ++struct test_node ++{ ++ struct test_node *next; ++}; ++ ++int iterate (struct test_node *, unsigned, struct test_ref **); ++ ++int ++loop_invar (struct test_node *node) ++{ ++ struct test_ref *ref; ++ ++ for (unsigned i = 0; iterate (node, i, &ref); i++) ++ if (loop_invar ((ref->referring && ref->referring->type == TYPE0) ++ ? ((struct test_node *) (ref->referring)) : 0)) ++ return 1; ++ ++ return 0; ++} ++ ++/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */ ++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c +@@ -0,0 +1,6 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */ ++ ++#define EXPN (-6 * (0.5*0.5*0.5*0.5)) ++ ++#include "pow-sqrt.x" +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c +@@ -0,0 +1,5 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */ ++ ++#define EXPN (-5.875) ++#include "pow-sqrt.x" +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c +@@ -0,0 +1,5 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */ ++ ++#define EXPN (1.25) ++#include "pow-sqrt.x" +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile { target sqrt_insn } } */ ++/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ ++/* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */ ++ ++double ++foo (double a) ++{ ++ return __builtin_pow (a, -5.875); ++} ++ ++double ++foof (double a) ++{ ++ return __builtin_pow (a, 0.75f); ++} ++ ++double ++bar (double a) ++{ ++ return __builtin_pow (a, 1.0 + 0.00390625); ++} ++ ++double ++baz (double a) ++{ ++ return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375); ++} ++ ++#define N 256 ++void ++vecfoo (double *a) ++{ ++ for (int i = 0; i < N; i++) ++ a[i] = __builtin_pow (a[i], 1.25); ++} ++ ++/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ ++/* { dg-final { cleanup-tree-dump "sincos" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x +@@ -0,0 +1,30 @@ ++ ++extern void abort (void); ++ ++ ++__attribute__((noinline)) double ++real_pow (double x, double pow_exp) ++{ ++ return __builtin_pow (x, pow_exp); ++} ++ ++#define EPS (0.000000000000000000001) ++ ++#define SYNTH_POW(X, Y) __builtin_pow (X, Y) ++volatile double arg; ++ ++int ++main (void) ++{ ++ double i_arg = 0.1; ++ ++ for (arg = i_arg; arg < 100.0; arg += 1.0) ++ { ++ double synth_res = SYNTH_POW (arg, EXPN); ++ double real_res = real_pow (arg, EXPN); ++ ++ if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS) ++ abort (); ++ } ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pr49551.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fdata-sections" } */ + -+;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also -+;; getting reused later. -+(define_insn "andsi_not_shiftsi_si_scc" -+ [(parallel [(set (reg:CC_NOOV CC_REGNUM) -+ (compare:CC_NOOV -+ (and:SI (not:SI (match_operator:SI 0 "shift_operator" -+ [(match_operand:SI 1 "s_register_operand" "r") -+ (match_operand:SI 2 "arm_rhs_operand" "rM")])) -+ (match_operand:SI 3 "s_register_operand" "r")) -+ (const_int 0))) -+ (set (match_operand:SI 4 "s_register_operand" "=r") -+ (and:SI (not:SI (match_op_dup 0 -+ [(match_dup 1) -+ (match_dup 2)])) -+ (match_dup 3)))])] -+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))" -+ "bic%.%?\\t%4, %3, %1%S0" -+ [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "conds" "set") -+ (set_attr "shift" "1") -+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "logic_shift_imm") -+ (const_string "logic_shift_reg")))] -+) ++int x = 1; ++int x; + - (define_insn "*andsi_notsi_si_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV -@@ -5076,7 +5125,7 @@ - - (define_split - [(set (match_operand:SI 0 "s_register_operand" "") -- (ior_xor:SI (and:SI (ashift:SI -+ (IOR_XOR:SI (and:SI (ashift:SI - (match_operand:SI 1 "s_register_operand" "") - (match_operand:SI 2 "const_int_operand" "")) - (match_operand:SI 3 "const_int_operand" "")) -@@ -5088,7 +5137,7 @@ - == (GET_MODE_MASK (GET_MODE (operands[5])) - & (GET_MODE_MASK (GET_MODE (operands[5])) - << (INTVAL (operands[2])))))" -- [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) -+ [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2)) - (match_dup 4))) - (set (match_dup 0) (zero_extend:SI (match_dup 5)))] - "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" -@@ -5667,7 +5716,7 @@ - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "length" "4") -- (set_attr "type" "mov_imm")] -+ (set_attr "type" "alu_sreg")] - ) - - (define_insn "*arm_movsi_insn" -@@ -6712,7 +6761,7 @@ - - /* Support only fixed point registers. */ - if (!CONST_INT_P (operands[2]) -- || INTVAL (operands[2]) > 14 -+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS - || INTVAL (operands[2]) < 2 - || !MEM_P (operands[1]) - || !REG_P (operands[0]) -@@ -6737,7 +6786,7 @@ - - /* Support only fixed point registers. */ - if (!CONST_INT_P (operands[2]) -- || INTVAL (operands[2]) > 14 -+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS - || INTVAL (operands[2]) < 2 - || !REG_P (operands[1]) - || !MEM_P (operands[0]) -@@ -6922,7 +6971,7 @@ - [(set_attr "conds" "set") - (set_attr "shift" "1") - (set_attr "arch" "32,a,a") -- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) -+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) - - (define_insn "*cmpsi_shiftsi_swp" - [(set (reg:CC_SWP CC_REGNUM) -@@ -6935,7 +6984,7 @@ - [(set_attr "conds" "set") - (set_attr "shift" "1") - (set_attr "arch" "32,a,a") -- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) -+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) - - (define_insn "*arm_cmpsi_negshiftsi_si" - [(set (reg:CC_Z CC_REGNUM) -@@ -7528,10 +7577,10 @@ - (const_string "mov_imm") - (const_string "mov_reg")) - (const_string "mvn_imm") -- (const_string "mov_reg") -- (const_string "mov_reg") -- (const_string "mov_reg") -- (const_string "mov_reg")])] -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*movsfcc_soft_insn" -@@ -7884,7 +7933,7 @@ - ) - - (define_expand "return" -- [(returns)] -+ [(RETURNS)] - "(TARGET_ARM || (TARGET_THUMB2 - && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL - && !IS_STACKALIGN (arm_current_func_type ()))) -@@ -7922,7 +7971,7 @@ - [(set (pc) - (if_then_else (match_operator 0 "arm_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) -- (returns) -+ (RETURNS) - (pc)))] - "TARGET_ARM " - "* -@@ -7945,7 +7994,7 @@ - (if_then_else (match_operator 0 "arm_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (pc) -- (returns)))] -+ (RETURNS)))] - "TARGET_ARM " - "* - { -@@ -8279,7 +8328,7 @@ - - (define_insn "*_multsi" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") -- (shiftable_ops:SI -+ (SHIFTABLE_OPS:SI - (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") - (match_operand:SI 3 "power_of_two_operand" "")) - (match_operand:SI 1 "s_register_operand" "rk,")))] -@@ -8293,7 +8342,7 @@ - - (define_insn "*_shiftsi" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") -- (shiftable_ops:SI -+ (SHIFTABLE_OPS:SI - (match_operator:SI 2 "shift_nomul_operator" - [(match_operand:SI 3 "s_register_operand" "r,r,r") - (match_operand:SI 4 "shift_amount_operand" "M,M,r")]) -@@ -8689,7 +8738,14 @@ - return \"\"; - " - [(set_attr "conds" "use") -- (set_attr "type" "mov_reg,mov_reg,multiple") -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (if_then_else (match_operand 1 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (const_string "multiple")]) - (set_attr "length" "4,4,8")] - ) - -@@ -9485,8 +9541,8 @@ - (const_string "alu_imm" ) - (const_string "alu_sreg")) - (const_string "alu_imm") -- (const_string "alu_sreg") -- (const_string "alu_sreg")])] -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_move_plus" -@@ -9523,7 +9579,13 @@ - sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1" - [(set_attr "conds" "use") - (set_attr "length" "4,4,8,8") -- (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 3 "const_int_operand" "") -+ (const_string "alu_imm" ) -+ (const_string "alu_sreg")) -+ (const_string "alu_imm") -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_arith_arith" -@@ -9618,7 +9680,11 @@ - %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1" - [(set_attr "conds" "use") - (set_attr "length" "4,8") -- (set_attr "type" "alu_shift_reg,multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 3 "const_int_operand" "") -+ (const_string "alu_shift_imm" ) -+ (const_string "alu_shift_reg")) -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_move_arith" -@@ -9679,7 +9745,11 @@ - %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1" - [(set_attr "conds" "use") - (set_attr "length" "4,8") -- (set_attr "type" "alu_shift_reg,multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 3 "const_int_operand" "") -+ (const_string "alu_shift_imm" ) -+ (const_string "alu_shift_reg")) -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_move_not" -@@ -9786,7 +9856,12 @@ - [(set_attr "conds" "use") - (set_attr "shift" "2") - (set_attr "length" "4,8,8") -- (set_attr "type" "mov_shift_reg,multiple,multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 3 "const_int_operand" "") -+ (const_string "mov_shift" ) -+ (const_string "mov_shift_reg")) -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_move_shift" -@@ -9824,7 +9899,12 @@ - [(set_attr "conds" "use") - (set_attr "shift" "2") - (set_attr "length" "4,8,8") -- (set_attr "type" "mov_shift_reg,multiple,multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 3 "const_int_operand" "") -+ (const_string "mov_shift" ) -+ (const_string "mov_shift_reg")) -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*ifcompare_shift_shift" -@@ -10905,7 +10985,7 @@ - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "length" "4") -- (set_attr "type" "mov_imm")] -+ (set_attr "type" "alu_sreg")] - ) - - (define_insn "*arm_rev" ---- a/src/gcc/config/arm/iterators.md -+++ b/src/gcc/config/arm/iterators.md -@@ -181,39 +181,53 @@ - ;; compare a second time. - (define_code_iterator LTUGEU [ltu geu]) - -+;; The signed gt, ge comparisons -+(define_code_iterator GTGE [gt ge]) ++/* { dg-final { scan-assembler-not {comm[\t ]+x} } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/pr67043.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fcompare-debug -w" } */ ++ ++extern void rt_mutex_owner (void); ++extern void rt_mutex_deadlock_account_lock (int); ++extern void signal_pending (void); ++__typeof__ (int *) a; ++int b; ++ ++int ++try_to_take_rt_mutex (int p1) { ++ rt_mutex_owner (); ++ if (b) ++ return 0; ++ rt_mutex_deadlock_account_lock (p1); ++ return 1; ++} ++ ++void ++__rt_mutex_slowlock (int p1) { ++ int c; ++ for (;;) { ++ c = ({ ++ asm ("" : "=r"(a)); ++ a; ++ }); ++ if (try_to_take_rt_mutex (c)) ++ break; ++ if (__builtin_expect (p1 == 0, 0)) ++ signal_pending (); ++ } ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "" } */ ++/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */ + -+;; The unsigned gt, ge comparisons -+(define_code_iterator GTUGEU [gtu geu]) ++void ++f0a (char *result, char *arg1, char *arg4, char temp_6) ++{ ++ int idx = 0; ++ for (idx = 0; idx < 416; idx += 1) ++ result[idx] = (arg1[idx] + arg4[idx]) * temp_6; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr64130.c +@@ -0,0 +1,18 @@ + -+;; Comparisons for vc -+(define_code_iterator COMPARISONS [eq gt ge le lt]) ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-vrp1" } */ + - ;; A list of ... --(define_code_iterator ior_xor [ior xor]) -+(define_code_iterator IOR_XOR [ior xor]) - - ;; Operations on two halves of a quadword vector. --(define_code_iterator vqh_ops [plus smin smax umin umax]) -+(define_code_iterator VQH_OPS [plus smin smax umin umax]) - - ;; Operations on two halves of a quadword vector, - ;; without unsigned variants (for use with *SFmode pattern). --(define_code_iterator vqhs_ops [plus smin smax]) -+(define_code_iterator VQHS_OPS [plus smin smax]) - - ;; A list of widening operators - (define_code_iterator SE [sign_extend zero_extend]) ++int funsigned (unsigned a) ++{ ++ return 0x1ffffffffL / a == 0; ++} ++ ++int funsigned2 (unsigned a) ++{ ++ if (a < 1) return 1; ++ return (-1 * 0x1ffffffffL) / a == 0; ++} ++ ++/* { dg-final { scan-tree-dump ": \\\[2, 8589934591\\\]" "vrp1" } } */ ++/* { dg-final { scan-tree-dump ": \\\[-8589934591, -2\\\]" "vrp1" } } */ ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ ++ ++void foo (double *p) ++{ ++ int i; ++ for (i = -20000; i < 200000; i+= 40) ++ { ++ p[i+0] = 1.0; ++ p[i+1] = 1.0; ++ p[i+2] = 1.0; ++ p[i+3] = 1.0; ++ p[i+4] = 1.0; ++ p[i+5] = 1.0; ++ p[i+6] = 1.0; ++ p[i+7] = 1.0; ++ p[i+8] = 1.0; ++ p[i+9] = 1.0; ++ p[i+10] = 1.0; ++ p[i+11] = 1.0; ++ p[i+12] = 1.0; ++ p[i+13] = 1.0; ++ p[i+14] = 1.0; ++ p[i+15] = 1.0; ++ p[i+16] = 1.0; ++ p[i+17] = 1.0; ++ p[i+18] = 1.0; ++ p[i+19] = 1.0; ++ p[i+20] = 1.0; ++ p[i+21] = 1.0; ++ p[i+22] = 1.0; ++ p[i+23] = 1.0; ++ p[i+24] = 1.0; ++ p[i+25] = 1.0; ++ p[i+26] = 1.0; ++ p[i+27] = 1.0; ++ p[i+28] = 1.0; ++ p[i+29] = 1.0; ++ p[i+30] = 1.0; ++ p[i+31] = 1.0; ++ p[i+32] = 1.0; ++ p[i+33] = 1.0; ++ p[i+34] = 1.0; ++ p[i+35] = 1.0; ++ p[i+36] = 1.0; ++ p[i+37] = 1.0; ++ p[i+38] = 1.0; ++ p[i+39] = 1.0; ++ } ++} ++ ++/* We should groups address type IV uses. */ ++/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } } */ ++/* { dg-final { cleanup-tree-dump "ivopts" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr66726-2.c +@@ -0,0 +1,19 @@ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt1-details" } */ ++ ++extern void bar (char, char); ++int ++foo (char b) ++{ ++ char a; ++ a = b; ++ b = 'b'; ++ bar (a, b); ++ b = a; ++ if (b == 0) ++ a++; ++ return a + b; ++} ++ ++/* { dg-final { scan-tree-dump-times "factor conversion out" 0 "phiopt1" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr66726.c +@@ -0,0 +1,15 @@ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt1-details" } */ ++ ++extern unsigned short mode_size[]; ++ ++int ++oof (int mode) ++{ ++ return (64 < mode_size[mode] ? 64 : mode_size[mode]); ++} ++ ++/* { dg-final { scan-tree-dump-times "factor conversion out" 1 "phiopt1" } } */ ++/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "phiopt1" } } */ ++ +--- a/src/gcc/testsuite/gcc.dg/vect/pr59354.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr59354.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-additional-options "-O3" } */ - ;; Right shifts --(define_code_iterator rshifts [ashiftrt lshiftrt]) -+(define_code_iterator RSHIFTS [ashiftrt lshiftrt]) + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/pr64252.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr64252.c +@@ -1,6 +1,5 @@ + /* PR target/64252 */ + /* Test correctness of size 3 store groups permutation. */ +-/* { dg-do run } */ + /* { dg-additional-options "-O3" } */ + /* { dg-additional-options "-mavx" { target avx_runtime } } */ - ;; Iterator for integer conversions - (define_code_iterator FIXUORS [fix unsigned_fix]) +--- a/src/gcc/testsuite/gcc.dg/vect/pr64404.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr64404.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-additional-options "--param=sccvn-max-alias-queries-per-access=1" } */ - ;; Binary operators whose second operand can be shifted. --(define_code_iterator shiftable_ops [plus minus ior xor and]) -+(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and]) + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/pr64493.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr64493.c +@@ -1,5 +1,3 @@ +-/* { dg-do run } */ +- + #include "tree-vect.h" --;; plus and minus are the only shiftable_ops for which Thumb2 allows -+;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows - ;; a stack pointer opoerand. The minus operation is a candidate for an rsub - ;; and hence only plus is supported. - (define_code_attr t2_binop0 - [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")]) + int a, b, c, d, e, f, g, h; +--- a/src/gcc/testsuite/gcc.dg/vect/pr64495.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr64495.c +@@ -1,5 +1,3 @@ +-/* { dg-do run } */ +- + #include + #include "tree-vect.h" --;; The instruction to use when a shiftable_ops has a shift operation as -+;; The instruction to use when a SHIFTABLE_OPS has a shift operation as - ;; its first operand. - (define_code_attr arith_shift_insn - [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")]) +--- a/src/gcc/testsuite/gcc.dg/vect/pr64844.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr64844.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-require-effective-target vect_double } */ + /* { dg-additional-options "-ffast-math" } */ -+(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le") -+ (gtu "gt") (geu "ge")]) -+ -+(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")]) -+ - ;;---------------------------------------------------------------------------- - ;; Int iterators - ;;---------------------------------------------------------------------------- -@@ -221,6 +235,10 @@ - (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM - UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) +--- a/src/gcc/testsuite/gcc.dg/vect/pr65518.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr65518.c +@@ -1,5 +1,3 @@ +-/* { dg-do run } */ +- + extern void abort (void); + + typedef struct giga +--- a/src/gcc/testsuite/gcc.dg/vect/vect-aggressive-1.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-aggressive-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-require-effective-target vect_condition } */ + /* { dg-require-effective-target vect_simd_clones } */ + /* { dg-additional-options "-fopenmp-simd" } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c +@@ -12,6 +12,8 @@ -+(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE]) -+ -+(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT]) + vf2_t vf2 = (vf2_t){ 17.f, 18.f }; + vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead }; ++vlf1_t vlf1 = (vlf1_t) { 17.0 }; + - (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) - - (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM -@@ -677,6 +695,11 @@ + union int128_t qword; - ]) + int *int_ptr = (int *)0xabcdef0123456789ULL; +@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11, long double, 98765432123456789.987654321L, Q0, flat) + FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64) + FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128) + FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat) ++FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat) + #endif +--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h ++++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h +@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16))); + /* 128-bit vector of 4 ints. */ + typedef int vi4_t __attribute__((vector_size (16))); -+(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt") -+ (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") -+ (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") -+ (UNSPEC_VCAGT "gt")]) ++/* 128-bit vector of 1 quad precision float. */ ++typedef long double vlf1_t __attribute__((vector_size (16))); + - (define_int_attr r [ - (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r") - (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "") -@@ -774,7 +797,7 @@ - (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")]) + /* signed quad-word (in an union for the convenience of initialization). */ + union int128_t + { +--- a/src/gcc/testsuite/gcc.target/aarch64/abs_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/abs_1.c +@@ -7,15 +7,14 @@ extern void abort (void); + long long + abs64 (long long a) + { +- /* { dg-final { scan-assembler "eor\t" } } */ +- /* { dg-final { scan-assembler "sub\t" } } */ ++ /* { dg-final { scan-assembler "csneg\t" } } */ + return llabs (a); + } - ;; Both kinds of return insn. --(define_code_iterator returns [return simple_return]) -+(define_code_iterator RETURNS [return simple_return]) - (define_code_attr return_str [(return "") (simple_return "simple_")]) - (define_code_attr return_simple_p [(return "false") (simple_return "true")]) - (define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)") ---- a/src/gcc/config/arm/iwmmxt.md -+++ b/src/gcc/config/arm/iwmmxt.md -@@ -107,8 +107,8 @@ - ) + long long + abs64_in_dreg (long long a) + { +- /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */ ++ /* { dg-final { scan-assembler "csneg\t" } } */ + register long long x asm ("d8") = a; + register long long y asm ("d9"); + asm volatile ("" : : "w" (x)); +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp +@@ -27,14 +27,26 @@ load_lib gcc-dg.exp - (define_insn "*iwmmxt_arm_movdi" -- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv") -- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))] -+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv") -+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,r,y,Uy,y, r,*w,*w,*Uvi,*w"))] - "TARGET_REALLY_IWMMXT - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode))" ---- a/src/gcc/config/arm/linux-eabi.h -+++ b/src/gcc/config/arm/linux-eabi.h -@@ -77,6 +77,23 @@ - %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ - %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" + # Initialize `dg'. + load_lib c-torture.exp +-load_lib target-supports.exp +-load_lib torture-options.exp -+/* For ARM musl currently supports four dynamic linkers: -+ - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI -+ - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI -+ - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB -+ - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB -+ musl does not support the legacy OABI mode. -+ All the dynamic linkers live in /lib. -+ We default to soft-float, EL. */ -+#undef MUSL_DYNAMIC_LINKER -+#if TARGET_BIG_ENDIAN_DEFAULT -+#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}" -+#else -+#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}" -+#endif -+#define MUSL_DYNAMIC_LINKER \ -+ "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1" + dg-init + +-if {[istarget arm*-*-*] +- && ![check_effective_target_arm_neon_ok]} then { +- return ++# The default action for a test is 'run'. Save current default. ++global dg-do-what-default ++set save-dg-do-what-default ${dg-do-what-default} + - /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to - use the GNU/Linux version, not the generic BPABI version. */ - #undef LINK_SPEC -@@ -107,6 +124,7 @@ - - #undef ENDFILE_SPEC - #define ENDFILE_SPEC \ -+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) ++# For ARM, make sure that we have a target compatible with NEON, and do ++# not attempt to run execution tests if the hardware doesn't support it. ++if {[istarget arm*-*-*]} then { ++ if {![check_effective_target_arm_neon_ok]} then { ++ return ++ } ++ if {![is-effective-target arm_neon_hw]} then { ++ set dg-do-what-default compile ++ } else { ++ set dg-do-what-default run ++ } ++} else { ++ set dg-do-what-default run + } - /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we ---- a/src/gcc/config/arm/neon.md -+++ b/src/gcc/config/arm/neon.md -@@ -1114,7 +1114,7 @@ - ;; lshrdi3_neon - (define_insn_and_split "di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") -- (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") -+ (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) -@@ -1194,71 +1194,6 @@ - [(set_attr "type" "neon_add_widen")] - ) + torture-init +@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS + set additional_flags [add_options_for_arm_neon ""] --;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit --;; shift-count granularity. That's good enough for the middle-end's current --;; needs. -- --;; Note that it's not safe to perform such an operation in big-endian mode, --;; due to element-ordering issues. -- --(define_expand "vec_shr_" -- [(match_operand:VDQ 0 "s_register_operand" "") -- (match_operand:VDQ 1 "s_register_operand" "") -- (match_operand:SI 2 "const_multiple_of_8_operand" "")] -- "TARGET_NEON && !BYTES_BIG_ENDIAN" --{ -- rtx zero_reg; -- HOST_WIDE_INT num_bits = INTVAL (operands[2]); -- const int width = GET_MODE_BITSIZE (mode); -- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; -- rtx (*gen_ext) (rtx, rtx, rtx, rtx) = -- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; -- -- if (num_bits == width) -- { -- emit_move_insn (operands[0], operands[1]); -- DONE; -- } -- -- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); -- operands[0] = gen_lowpart (bvecmode, operands[0]); -- operands[1] = gen_lowpart (bvecmode, operands[1]); -- -- emit_insn (gen_ext (operands[0], operands[1], zero_reg, -- GEN_INT (num_bits / BITS_PER_UNIT))); -- DONE; --}) -- --(define_expand "vec_shl_" -- [(match_operand:VDQ 0 "s_register_operand" "") -- (match_operand:VDQ 1 "s_register_operand" "") -- (match_operand:SI 2 "const_multiple_of_8_operand" "")] -- "TARGET_NEON && !BYTES_BIG_ENDIAN" --{ -- rtx zero_reg; -- HOST_WIDE_INT num_bits = INTVAL (operands[2]); -- const int width = GET_MODE_BITSIZE (mode); -- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; -- rtx (*gen_ext) (rtx, rtx, rtx, rtx) = -- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; -- -- if (num_bits == 0) -- { -- emit_move_insn (operands[0], CONST0_RTX (mode)); -- DONE; + # Main loop. +-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] { +- # If we're only testing specific files and this isn't one of them, skip it. +- if ![runtest_file_p $runtests $src] then { +- continue - } - -- num_bits = width - num_bits; -- -- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); -- operands[0] = gen_lowpart (bvecmode, operands[0]); -- operands[1] = gen_lowpart (bvecmode, operands[1]); -- -- emit_insn (gen_ext (operands[0], zero_reg, operands[1], -- GEN_INT (num_bits / BITS_PER_UNIT))); -- DONE; --}) -- - ;; Helpers for quad-word reduction operations +- # runtest_file_p is already run above, and the code below can run +- # runtest_file_p again, make sure everything for this test is +- # performed if the above runtest_file_p decided this runtest +- # instance should execute the test +- gcc_parallel_test_enable 0 +- c-torture-execute $src $additional_flags +- gcc-dg-runtest $src "" $additional_flags +- gcc_parallel_test_enable 1 +-} ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ ++ "" ${additional_flags} - ; Add (or smin, smax...) the low N/2 elements of the N-element vector -@@ -1267,7 +1202,7 @@ + # All done. ++set dg-do-what-default ${save-dg-do-what-default} + torture-finish + dg-finish +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +@@ -235,7 +235,8 @@ extern ARRAY(expected, hfloat, 64, 2); + + typedef union { + struct { +- int _xxx:25; ++ int _xxx:24; ++ unsigned int FZ:1; + unsigned int DN:1; + unsigned int AHP:1; + unsigned int QC:1; +@@ -258,7 +259,8 @@ typedef union { + unsigned int QC:1; + unsigned int AHP:1; + unsigned int DN:1; +- int _dnm:25; ++ unsigned int FZ:1; ++ int _dnm:24; + } b; + unsigned int word; + } _ARM_FPSCR; +@@ -395,10 +397,15 @@ static void clean_results (void) + #if defined(__aarch64__) + /* On AArch64, make sure to return DefaultNaN to have the same + results as on AArch32. */ +- _ARM_FPSCR _afpscr_for_dn; +- asm volatile ("mrs %0,fpcr" : "=r" (_afpscr_for_dn)); +- _afpscr_for_dn.b.DN = 1; +- asm volatile ("msr fpcr,%0" : : "r" (_afpscr_for_dn)); ++ _ARM_FPSCR _afpscr; ++ asm volatile ("mrs %0,fpcr" : "=r" (_afpscr)); ++ _afpscr.b.DN = 1; ++ ++ /* On AArch64, make sure to flush to zero by default, as on ++ AArch32. */ ++ _afpscr.b.FZ = 1; ++ ++ asm volatile ("msr fpcr,%0" : : "r" (_afpscr)); + #endif + } - (define_insn "quad_halves_v4si" - [(set (match_operand:V2SI 0 "s_register_operand" "=w") -- (vqh_ops:V2SI -+ (VQH_OPS:V2SI - (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") - (parallel [(const_int 0) (const_int 1)])) - (vec_select:V2SI (match_dup 1) -@@ -1280,7 +1215,7 @@ +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc +@@ -55,7 +55,22 @@ void FNNAME (INSN_NAME) (void) + /* Apply a binary operator named INSN_NAME. */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + + #ifdef EXTRA_TESTS + EXTRA_TESTS(); +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc +@@ -76,7 +76,22 @@ void FNNAME (INSN_NAME) (void) + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_cumulative_sat, ""); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat, ""); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + + #ifdef EXTRA_TESTS + EXTRA_TESTS(); +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc +@@ -57,7 +57,12 @@ void FNNAME (INSN_NAME) (void) + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + + #ifdef EXTRA_TESTS + EXTRA_TESTS(); +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc +@@ -60,7 +60,12 @@ void FNNAME (INSN_NAME) (void) + TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + } - (define_insn "quad_halves_v4sf" - [(set (match_operand:V2SF 0 "s_register_operand" "=w") -- (vqhs_ops:V2SF -+ (VQHS_OPS:V2SF - (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") - (parallel [(const_int 0) (const_int 1)])) - (vec_select:V2SF (match_dup 1) -@@ -1293,7 +1228,7 @@ + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc +@@ -60,7 +60,12 @@ void FNNAME (INSN_NAME) (void) + TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + } - (define_insn "quad_halves_v8hi" - [(set (match_operand:V4HI 0 "s_register_operand" "+w") -- (vqh_ops:V4HI -+ (VQH_OPS:V4HI - (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)])) -@@ -1308,7 +1243,7 @@ + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c +@@ -7,16 +7,10 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9, + 0xfa, 0xfb, 0xfc, 0xfd }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56, + 0x57, 0x58, 0x59, 0x5a }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61, + 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, +@@ -24,8 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61, + VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f, + 0xba0, 0xba1, 0xba2, 0xba3 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff, + 0x0, 0x1, 0x2, 0x3, +@@ -33,16 +25,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc, + 0xfffd, 0xfffe, 0xffff, 0x0 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define TEST_MSG "VABA/VABAQ" + void exec_vaba (void) +@@ -132,7 +114,18 @@ void exec_vaba (void) + TEST_VABA(q, uint, u, 16, 8); + TEST_VABA(q, uint, u, 32, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + } - (define_insn "quad_halves_v16qi" - [(set (match_operand:V8QI 0 "s_register_operand" "+w") -- (vqh_ops:V8QI -+ (VQH_OPS:V8QI - (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) -@@ -2200,134 +2135,140 @@ - [(set_attr "type" "neon_sub_halve_narrow_q")] - ) + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c +@@ -3,45 +3,15 @@ + #include "compute-ref-data.h" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9, + 0xfffa, 0xfffb, 0xfffc, 0xfffd }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56, + 0x57, 0x58, 0x59, 0x5a }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7, + 0xffffffe8 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results for cases with input values chosen to test + possible intermediate overflow. */ +@@ -121,7 +91,12 @@ void exec_vabal (void) + TEST_VABAL(uint, u, 16, 32, 4); + TEST_VABAL(uint, u, 32, 64, 2); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + + /* Use values that could lead to overflow intermediate + * calculations. */ +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c +@@ -8,15 +8,10 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe, + 0xd, 0xc, 0xb, 0xa }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, + 0xf3, 0xf4, 0xf5, 0xf6 }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, +@@ -25,8 +20,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17, + VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1, + 0x0, 0x1, 0x2, 0x3 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, +@@ -35,14 +28,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7, + 0xffe8, 0xffe9, 0xffea, 0xffeb }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1, + 0xffffffd2, 0xffffffd3 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1, + 0x42387ae1, 0x42347ae1 }; + +@@ -130,7 +115,20 @@ void exec_vabd (void) + TEST_VABD(q, uint, u, 32, 4); + TEST_VABD(q, float, f, 32, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); + + + /* Extra FP tests with special values (-0.0, ....) */ +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c +@@ -3,45 +3,15 @@ + #include "compute-ref-data.h" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe, + 0xd, 0xc, 0xb, 0xa }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, + 0xf3, 0xf4, 0xf5, 0xf6 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8, + 0xffffffe9 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define TEST_MSG "VABDL" + void exec_vabdl (void) +@@ -99,7 +69,12 @@ void exec_vabdl (void) + TEST_VABDL(uint, u, 16, 32, 4); + TEST_VABDL(uint, u, 32, 64, 2); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + } --(define_insn "neon_vceq" -- [(set (match_operand: 0 "s_register_operand" "=w,w") -- (unspec: -- [(match_operand:VDQW 1 "s_register_operand" "w,w") -- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] -- UNSPEC_VCEQ))] -+;; These may expand to an UNSPEC pattern when a floating point mode is used -+;; without unsafe math optimizations. -+(define_expand "neon_vc" -+ [(match_operand: 0 "s_register_operand" "=w,w") -+ (neg: -+ (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w") -+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))] - "TARGET_NEON" -- "@ -- vceq.\t%0, %1, %2 -- vceq.\t%0, %1, #0" -- [(set (attr "type") -- (if_then_else (match_test "") -- (const_string "neon_fp_compare_s") -- (if_then_else (match_operand 2 "zero_operand") -- (const_string "neon_compare_zero") -- (const_string "neon_compare"))))] -+ { -+ /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations -+ are enabled. */ -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT -+ && !flag_unsafe_math_optimizations) -+ { -+ /* We don't just emit a gen_neon_vc_insn_unspec because -+ we define gen_neon_vceq_insn_unspec only for float modes -+ whereas this expander iterates over the integer modes as well, -+ but we will never expand to UNSPECs for the integer comparisons. */ -+ switch (mode) -+ { -+ case V2SFmode: -+ emit_insn (gen_neon_vcv2sf_insn_unspec (operands[0], -+ operands[1], -+ operands[2])); -+ break; -+ case V4SFmode: -+ emit_insn (gen_neon_vcv4sf_insn_unspec (operands[0], -+ operands[1], -+ operands[2])); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ else -+ emit_insn (gen_neon_vc_insn (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+ } - ) + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c +@@ -12,41 +12,11 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, + 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results for float32 variants. Needs to be separated since + the generic test function does not test floating-point +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c +@@ -18,10 +18,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, +@@ -40,14 +36,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6, + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, + 0xfffffffffffffff4 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results for float32 variants. Needs to be separated since + the generic test function does not test floating-point +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c +@@ -6,46 +6,16 @@ + #define TEST_MSG "VADDL" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, + 0xffe7, 0xffe8, 0xffe9, 0xffea }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, + 0xffffffe4, 0xffffffe5 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, + 0xffffffffffffffe1 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6, + 0x1e7, 0x1e8, 0x1e9, 0x1ea }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2, + 0x1ffe3, 0x1ffe4 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vXXXl.inc" +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c +@@ -6,46 +6,16 @@ + #define TEST_MSG "VADDW" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, + 0xffe7, 0xffe8, 0xffe9, 0xffea }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, + 0xffffffe4, 0xffffffe5 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, + 0xffffffffffffffe1 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2, + 0xffe3, 0xffe4 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vXXXw.inc" +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c +@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2, + 0xf4, 0xf4, 0xf6, 0xf6, + 0xf0, 0xf0, 0xf2, 0xf2, +@@ -35,11 +31,3 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x0, 0x1, 0x2, 0x3 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c +@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1, + 0x0, 0x1, 0x0, 0x1, + 0x8, 0x9, 0x8, 0x9, +@@ -36,11 +32,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0, + 0xffffffc0, 0xffffffc0 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c +@@ -4,48 +4,9 @@ + #include "cmp_fp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c +@@ -4,47 +4,9 @@ + #include "cmp_fp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0x0, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c +@@ -4,46 +4,8 @@ + #include "cmp_fp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; + VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c +@@ -4,46 +4,8 @@ + #include "cmp_fp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; + VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c +@@ -8,29 +8,9 @@ void exec_vceq_p8(void); + #include "cmp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +@@ -38,16 +18,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xffff, 0x0 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, + 0x0, 0x0, 0x0, 0x0 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c +@@ -4,29 +4,9 @@ + #include "cmp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +@@ -34,16 +14,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xffff, 0xffff }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, + 0xff, 0xff, 0xff, 0xff }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c +@@ -4,29 +4,9 @@ + #include "cmp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +@@ -34,16 +14,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xffff }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0xff, 0xff, 0xff, 0xff }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c +@@ -4,30 +4,10 @@ + #include "cmp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x0 }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, +@@ -36,16 +16,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0x0 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + 0x0, 0x0, 0x0, 0x0 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c +@@ -6,16 +6,6 @@ + VECT_VAR_DECL(expected,int,8,8) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x2, 0x2, 0x2, 0x2 }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7, + 0x7, 0x7, 0x7, 0x7, + 0x7, 0x7, 0x7, 0x7, +@@ -23,45 +13,12 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7, + VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x14, 0x14, 0x14, 0x14 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results with negative input. */ + VECT_VAR_DECL(expected_with_negative,int,8,8) [] = { 0x7, 0x7, 0x7, 0x7, + 0x7, 0x7, 0x7, 0x7 }; + VECT_VAR_DECL(expected_with_negative,int,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; + VECT_VAR_DECL(expected_with_negative,int,32,2) [] = { 0x1, 0x1 }; +-VECT_VAR_DECL(expected_with_negative,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_negative,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_negative,uint,16,4) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_negative,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_with_negative,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_negative,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_negative,poly,16,4) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_negative,hfloat,32,2) [] = { 0x33333333, +- 0x33333333 }; + VECT_VAR_DECL(expected_with_negative,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +@@ -69,32 +26,6 @@ VECT_VAR_DECL(expected_with_negative,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + VECT_VAR_DECL(expected_with_negative,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2 }; + VECT_VAR_DECL(expected_with_negative,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +-VECT_VAR_DECL(expected_with_negative,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_negative,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_negative,uint,16,8) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_negative,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_with_negative,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_negative,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_negative,poly,16,8) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_negative,hfloat,32,4) [] = { 0x33333333, +- 0x33333333, +- 0x33333333, +- 0x33333333 }; + + #define INSN_NAME vcls + #define TEST_MSG "VCLS/VCLSQ" +@@ -146,7 +77,13 @@ FNNAME (INSN_NAME) + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + +- CHECK_RESULTS (TEST_MSG, " (positive input)"); ++#define MSG_POSITIVE " (positive input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, MSG_POSITIVE); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, MSG_POSITIVE); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, MSG_POSITIVE); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, MSG_POSITIVE); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, MSG_POSITIVE); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, MSG_POSITIVE); + + /* Fill input vector with arbitrary values (negative). */ + VDUP(vector, , int, s, 8, 8, 0xFF); +@@ -164,7 +101,13 @@ FNNAME (INSN_NAME) + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_with_negative, " (negative input)"); ++#define MSG_NEGATIVE " (negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_with_negative, MSG_NEGATIVE); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_with_negative, MSG_NEGATIVE); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_with_negative, MSG_NEGATIVE); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_with_negative, MSG_NEGATIVE); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_with_negative, MSG_NEGATIVE); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_with_negative, MSG_NEGATIVE); + } --(define_insn "neon_vcge" -+(define_insn "neon_vc_insn" - [(set (match_operand: 0 "s_register_operand" "=w,w") -- (unspec: -- [(match_operand:VDQW 1 "s_register_operand" "w,w") -- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] -- UNSPEC_VCGE))] -- "TARGET_NEON" -- "@ -- vcge.\t%0, %1, %2 -- vcge.\t%0, %1, #0" -+ (neg: -+ (COMPARISONS: -+ (match_operand:VDQW 1 "s_register_operand" "w,w") -+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] -+ "TARGET_NEON && !(GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT -+ && !flag_unsafe_math_optimizations)" -+ { -+ char pattern[100]; -+ sprintf (pattern, "vc.%s%%#\t%%0," -+ " %%1, %s", -+ GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT -+ ? "f" : "", -+ which_alternative == 0 -+ ? "%2" : "#0"); -+ output_asm_insn (pattern, operands); -+ return ""; + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c +@@ -4,30 +4,10 @@ + #include "cmp_op.inc" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x0, 0x0 }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333, +- 0x333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, +@@ -35,16 +15,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0x0, 0x0 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c +@@ -6,36 +6,18 @@ + VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, + 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd, + 0xd, 0xd, 0xd, 0xd }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + + /* Expected results with input=0. */ +@@ -43,16 +25,10 @@ VECT_VAR_DECL(expected_with_0,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8 }; + VECT_VAR_DECL(expected_with_0,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; + VECT_VAR_DECL(expected_with_0,int,32,2) [] = { 0x20, 0x20 }; +-VECT_VAR_DECL(expected_with_0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_with_0,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8 }; + VECT_VAR_DECL(expected_with_0,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; + VECT_VAR_DECL(expected_with_0,uint,32,2) [] = { 0x20, 0x20 }; +-VECT_VAR_DECL(expected_with_0,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_0,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_0,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_0,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8, +@@ -60,8 +36,6 @@ VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8, + VECT_VAR_DECL(expected_with_0,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10 }; + VECT_VAR_DECL(expected_with_0,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +-VECT_VAR_DECL(expected_with_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8, + 0x8, 0x8, 0x8, 0x8, +@@ -69,16 +43,6 @@ VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8, + VECT_VAR_DECL(expected_with_0,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10 }; + VECT_VAR_DECL(expected_with_0,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +-VECT_VAR_DECL(expected_with_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_with_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_with_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_with_0,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define INSN_NAME vclz + #define TEST_MSG "VCLZ/VCLZQ" +@@ -154,7 +118,18 @@ FNNAME (INSN_NAME) + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + + /* Test with zero as input. */ + VDUP(vector, , int, s, 8, 8, 0); +@@ -184,7 +159,19 @@ FNNAME (INSN_NAME) + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_with_0, " (input=0)"); ++#define MSG_ZERO " (input=0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_with_0, MSG_ZERO); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_with_0, MSG_ZERO); + } + + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c +@@ -4,37 +4,14 @@ + + /* Expected results. */ + VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,poly,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, + 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,poly,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, + 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define INSN_NAME vcnt + #define TEST_MSG "VCNT/VCNTQ" +@@ -86,7 +63,12 @@ FNNAME (INSN_NAME) + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); + } + + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c +@@ -3,20 +3,6 @@ + #include "compute-ref-data.h" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0x11, 0x11, 0x11, 0x11, +@@ -88,7 +74,17 @@ void exec_vcombine (void) + TEST_VCOMBINE(poly, p, 16, 4, 8); + TEST_VCOMBINE(float, f, 32, 2, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); + } + + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c +@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, + 0x78, 0x56, 0x34, 0x12 }; + VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; + VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define INSN_NAME vcreate + #define TEST_MSG "VCREATE" +@@ -113,7 +85,17 @@ FNNAME (INSN_NAME) + TEST_VCREATE(poly, p, 8, 8); + TEST_VCREATE(poly, p, 16, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + } + + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c +@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5, + 0x2, 0x3, 0x0, 0x1, + 0xe, 0xf, 0xc, 0xd, +@@ -37,11 +33,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6, + 0xffffffc5, 0xffffffc4 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, + 0xfffffffffffffff2 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c +@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; + VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define TEST_MSG "VGET_HIGH" + void exec_vget_high (void) +@@ -76,7 +48,17 @@ void exec_vget_high (void) + TEST_VGET_HIGH(poly, p, 16, 4, 8); + TEST_VGET_HIGH(float, f, 32, 2, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + } + + int main (void) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c +@@ -0,0 +1,125 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected results. */ ++int8_t expected_s8 = 0xf7; ++int16_t expected_s16 = 0xfff3; ++int32_t expected_s32 = 0xfffffff1; ++int64_t expected_s64 = 0xfffffffffffffff0; ++uint8_t expected_u8 = 0xf6; ++uint16_t expected_u16 = 0xfff2; ++uint32_t expected_u32 = 0xfffffff1; ++uint64_t expected_u64 = 0xfffffffffffffff0; ++poly8_t expected_p8 = 0xf6; ++poly16_t expected_p16 = 0xfff2; ++hfloat32_t expected_f32 = 0xc1700000; ++ ++int8_t expectedq_s8 = 0xff; ++int16_t expectedq_s16 = 0xfff5; ++int32_t expectedq_s32 = 0xfffffff3; ++int64_t expectedq_s64 = 0xfffffffffffffff1; ++uint8_t expectedq_u8 = 0xfe; ++uint16_t expectedq_u16 = 0xfff6; ++uint32_t expectedq_u32 = 0xfffffff2; ++uint64_t expectedq_u64 = 0xfffffffffffffff1; ++poly8_t expectedq_p8 = 0xfe; ++poly16_t expectedq_p16 = 0xfff6; ++hfloat32_t expectedq_f32 = 0xc1500000; ++ ++int error_found = 0; ++ ++#define TEST_MSG "VGET_LANE" ++void exec_vget_lane (void) ++{ ++ /* vec=vget_lane(vec, lane), then store the result. */ ++#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ ++ VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ ++ if (VAR(var, T1, W) != expected##Q##_##T2##W) { \ ++ fprintf(stderr, \ ++ "ERROR in %s (%s line %d in result '%s') at type %s " \ ++ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \ ++ TEST_MSG, __FILE__, __LINE__, \ ++ STR(expected##Q##_##T2##W), \ ++ STR(VECT_NAME(T1, W, N)), \ ++ VAR(var, T1, W), \ ++ expected##Q##_##T2##W); \ ++ error_found = 1; \ + } - [(set (attr "type") -- (if_then_else (match_test "") -- (const_string "neon_fp_compare_s") -- (if_then_else (match_operand 2 "zero_operand") -+ (if_then_else (match_operand 2 "zero_operand") - (const_string "neon_compare_zero") -- (const_string "neon_compare"))))] --) -- --(define_insn "neon_vcgeu" -- [(set (match_operand: 0 "s_register_operand" "=w") -- (unspec: -- [(match_operand:VDQIW 1 "s_register_operand" "w") -- (match_operand:VDQIW 2 "s_register_operand" "w")] -- UNSPEC_VCGEU))] -- "TARGET_NEON" -- "vcge.u%#\t%0, %1, %2" -- [(set_attr "type" "neon_compare")] -+ (const_string "neon_compare")))] - ) ++ ++ /* Special variant for floating-point. */ ++ union { ++ uint32_t var_int32; ++ float32_t var_float32; ++ } var_int32_float32; ++ ++#define TEST_VGET_LANE_FP(Q, T1, T2, W, N, L) \ ++ VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ ++ var_int##W##_float##W.var_float##W = VAR(var, T1, W); \ ++ if (var_int##W##_float##W.var_int##W != expected##Q##_##T2##W) { \ ++ fprintf(stderr, \ ++ "ERROR in %s (%s line %d in result '%s') at type %s " \ ++ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \ ++ TEST_MSG, __FILE__, __LINE__, \ ++ STR(expected##Q##_##T2##W), \ ++ STR(VECT_NAME(T1, W, N)), \ ++ var_int##W##_float##W.var_int##W, \ ++ expected##Q##_##T2##W); \ ++ error_found = 1; \ ++ } ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ ++ /* Scalar variables. */ ++ VAR_DECL(var, int, 8); ++ VAR_DECL(var, int, 16); ++ VAR_DECL(var, int, 32); ++ VAR_DECL(var, int, 64); ++ VAR_DECL(var, uint, 8); ++ VAR_DECL(var, uint, 16); ++ VAR_DECL(var, uint, 32); ++ VAR_DECL(var, uint, 64); ++ VAR_DECL(var, poly, 8); ++ VAR_DECL(var, poly, 16); ++ VAR_DECL(var, float, 32); ++ ++ /* Initialize input values. */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ VLOAD(vector, buffer, , float, f, 32, 2); ++ VLOAD(vector, buffer, q, float, f, 32, 4); ++ ++ /* Choose lane arbitrarily. */ ++ TEST_VGET_LANE(, int, s, 8, 8, 7); ++ TEST_VGET_LANE(, int, s, 16, 4, 3); ++ TEST_VGET_LANE(, int, s, 32, 2, 1); ++ TEST_VGET_LANE(, int, s, 64, 1, 0); ++ TEST_VGET_LANE(, uint, u, 8, 8, 6); ++ TEST_VGET_LANE(, uint, u, 16, 4, 2); ++ TEST_VGET_LANE(, uint, u, 32, 2, 1); ++ TEST_VGET_LANE(, uint, u, 64, 1, 0); ++ TEST_VGET_LANE(, poly, p, 8, 8, 6); ++ TEST_VGET_LANE(, poly, p, 16, 4, 2); ++ TEST_VGET_LANE_FP(, float, f, 32, 2, 1); ++ ++ TEST_VGET_LANE(q, int, s, 8, 16, 15); ++ TEST_VGET_LANE(q, int, s, 16, 8, 5); ++ TEST_VGET_LANE(q, int, s, 32, 4, 3); ++ TEST_VGET_LANE(q, int, s, 64, 2, 1); ++ TEST_VGET_LANE(q, uint, u, 8, 16, 14); ++ TEST_VGET_LANE(q, uint, u, 16, 8, 6); ++ TEST_VGET_LANE(q, uint, u, 32, 4, 2); ++ TEST_VGET_LANE(q, uint, u, 64, 2, 1); ++ TEST_VGET_LANE(q, poly, p, 8, 16, 14); ++ TEST_VGET_LANE(q, poly, p, 16, 8, 6); ++ TEST_VGET_LANE_FP(q, float, f, 32, 4, 3); ++} ++ ++int main (void) ++{ ++ exec_vget_lane (); ++ ++ if (error_found) ++ abort(); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c +@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; + VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #define TEST_MSG "VGET_LOW" + void exec_vget_low (void) +@@ -76,7 +48,17 @@ void exec_vget_low (void) + TEST_VGET_LOW(poly, p, 16, 4, 8); + TEST_VGET_LOW(float, f, 32, 2, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + } --(define_insn "neon_vcgt" -+(define_insn "neon_vc_insn_unspec" - [(set (match_operand: 0 "s_register_operand" "=w,w") - (unspec: -- [(match_operand:VDQW 1 "s_register_operand" "w,w") -- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")] -- UNSPEC_VCGT))] -+ [(match_operand:VCVTF 1 "s_register_operand" "w,w") -+ (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] -+ NEON_VCMP))] - "TARGET_NEON" -- "@ -- vcgt.\t%0, %1, %2 -- vcgt.\t%0, %1, #0" -- [(set (attr "type") -- (if_then_else (match_test "") -- (const_string "neon_fp_compare_s") -- (if_then_else (match_operand 2 "zero_operand") -- (const_string "neon_compare_zero") -- (const_string "neon_compare"))))] -+ { -+ char pattern[100]; -+ sprintf (pattern, "vc.f%%#\t%%0," -+ " %%1, %s", -+ which_alternative == 0 -+ ? "%2" : "#0"); -+ output_asm_insn (pattern, operands); -+ return ""; + int main (void) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x2x2_t ++f_vld2_lane_f32 (float32_t * p, float32x2x2_t v) ++{ ++ float32x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_f32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x1x2_t ++f_vld2_lane_f64 (float64_t * p, float64x1x2_t v) ++{ ++ float64x1x2_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++poly8x8x2_t ++f_vld2_lane_p8 (poly8_t * p, poly8x8x2_t v) ++{ ++ poly8x8x2_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x4x2_t ++f_vld2_lane_s16 (int16_t * p, int16x4x2_t v) ++{ ++ int16x4x2_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x2x2_t ++f_vld2_lane_s32 (int32_t * p, int32x2x2_t v) ++{ ++ int32x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x1x2_t ++f_vld2_lane_s64 (int64_t * p, int64x1x2_t v) ++{ ++ int64x1x2_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int8x8x2_t ++f_vld2_lane_s8 (int8_t * p, int8x8x2_t v) ++{ ++ int8x8x2_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x4x2_t ++f_vld2_lane_u16 (uint16_t * p, uint16x4x2_t v) ++{ ++ uint16x4x2_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x2x2_t ++f_vld2_lane_u32 (uint32_t * p, uint32x2x2_t v) ++{ ++ uint32x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x1x2_t ++f_vld2_lane_u64 (uint64_t * p, uint64x1x2_t v) ++{ ++ uint64x1x2_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint8x8x2_t ++f_vld2_lane_u8 (uint8_t * p, uint8x8x2_t v) ++{ ++ uint8x8x2_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x4x2_t ++f_vld2q_lane_f32 (float32_t * p, float32x4x2_t v) ++{ ++ float32x4x2_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_f32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x2x2_t ++f_vld2q_lane_f64 (float64_t * p, float64x2x2_t v) ++{ ++ float64x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++poly8x16x2_t ++f_vld2q_lane_p8 (poly8_t * p, poly8x16x2_t v) ++{ ++ poly8x16x2_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x8x2_t ++f_vld2q_lane_s16 (int16_t * p, int16x8x2_t v) ++{ ++ int16x8x2_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x4x2_t ++f_vld2q_lane_s32 (int32_t * p, int32x4x2_t v) ++{ ++ int32x4x2_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x2x2_t ++f_vld2q_lane_s64 (int64_t * p, int64x2x2_t v) ++{ ++ int64x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int8x16x2_t ++f_vld2q_lane_s8 (int8_t * p, int8x16x2_t v) ++{ ++ int8x16x2_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x8x2_t ++f_vld2q_lane_u16 (uint16_t * p, uint16x8x2_t v) ++{ ++ uint16x8x2_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x4x2_t ++f_vld2q_lane_u32 (uint32_t * p, uint32x4x2_t v) ++{ ++ uint32x4x2_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x2x2_t ++f_vld2q_lane_u64 (uint64_t * p, uint64x2x2_t v) ++{ ++ uint64x2x2_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint8x16x2_t ++f_vld2q_lane_u8 (uint8_t * p, uint8x16x2_t v) ++{ ++ uint8x16x2_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld2q_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x2x3_t ++f_vld3_lane_f32 (float32_t * p, float32x2x3_t v) ++{ ++ float32x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_f32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x1x3_t ++f_vld3_lane_f64 (float64_t * p, float64x1x3_t v) ++{ ++ float64x1x3_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++poly8x8x3_t ++f_vld3_lane_p8 (poly8_t * p, poly8x8x3_t v) ++{ ++ poly8x8x3_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x4x3_t ++f_vld3_lane_s16 (int16_t * p, int16x4x3_t v) ++{ ++ int16x4x3_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x2x3_t ++f_vld3_lane_s32 (int32_t * p, int32x2x3_t v) ++{ ++ int32x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x1x3_t ++f_vld3_lane_s64 (int64_t * p, int64x1x3_t v) ++{ ++ int64x1x3_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int8x8x3_t ++f_vld3_lane_s8 (int8_t * p, int8x8x3_t v) ++{ ++ int8x8x3_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x4x3_t ++f_vld3_lane_u16 (uint16_t * p, uint16x4x3_t v) ++{ ++ uint16x4x3_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x2x3_t ++f_vld3_lane_u32 (uint32_t * p, uint32x2x3_t v) ++{ ++ uint32x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x1x3_t ++f_vld3_lane_u64 (uint64_t * p, uint64x1x3_t v) ++{ ++ uint64x1x3_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint8x8x3_t ++f_vld3_lane_u8 (uint8_t * p, uint8x8x3_t v) ++{ ++ uint8x8x3_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x4x3_t ++f_vld3q_lane_f32 (float32_t * p, float32x4x3_t v) ++{ ++ float32x4x3_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_f32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x2x3_t ++f_vld3q_lane_f64 (float64_t * p, float64x2x3_t v) ++{ ++ float64x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++poly8x16x3_t ++f_vld3q_lane_p8 (poly8_t * p, poly8x16x3_t v) ++{ ++ poly8x16x3_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x8x3_t ++f_vld3q_lane_s16 (int16_t * p, int16x8x3_t v) ++{ ++ int16x8x3_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x4x3_t ++f_vld3q_lane_s32 (int32_t * p, int32x4x3_t v) ++{ ++ int32x4x3_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x2x3_t ++f_vld3q_lane_s64 (int64_t * p, int64x2x3_t v) ++{ ++ int64x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int8x16x3_t ++f_vld3q_lane_s8 (int8_t * p, int8x16x3_t v) ++{ ++ int8x16x3_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x8x3_t ++f_vld3q_lane_u16 (uint16_t * p, uint16x8x3_t v) ++{ ++ uint16x8x3_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x4x3_t ++f_vld3q_lane_u32 (uint32_t * p, uint32x4x3_t v) ++{ ++ uint32x4x3_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x2x3_t ++f_vld3q_lane_u64 (uint64_t * p, uint64x2x3_t v) ++{ ++ uint64x2x3_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint8x16x3_t ++f_vld3q_lane_u8 (uint8_t * p, uint8x16x3_t v) ++{ ++ uint8x16x3_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld3q_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x2x4_t ++f_vld4_lane_f32 (float32_t * p, float32x2x4_t v) ++{ ++ float32x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_f32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x1x4_t ++f_vld4_lane_f64 (float64_t * p, float64x1x4_t v) ++{ ++ float64x1x4_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++poly8x8x4_t ++f_vld4_lane_p8 (poly8_t * p, poly8x8x4_t v) ++{ ++ poly8x8x4_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x4x4_t ++f_vld4_lane_s16 (int16_t * p, int16x4x4_t v) ++{ ++ int16x4x4_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x2x4_t ++f_vld4_lane_s32 (int32_t * p, int32x2x4_t v) ++{ ++ int32x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x1x4_t ++f_vld4_lane_s64 (int64_t * p, int64x1x4_t v) ++{ ++ int64x1x4_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int8x8x4_t ++f_vld4_lane_s8 (int8_t * p, int8x8x4_t v) ++{ ++ int8x8x4_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x4x4_t ++f_vld4_lane_u16 (uint16_t * p, uint16x4x4_t v) ++{ ++ uint16x4x4_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x2x4_t ++f_vld4_lane_u32 (uint32_t * p, uint32x2x4_t v) ++{ ++ uint32x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x1x4_t ++f_vld4_lane_u64 (uint64_t * p, uint64x1x4_t v) ++{ ++ uint64x1x4_t res; ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint8x8x4_t ++f_vld4_lane_u8 (uint8_t * p, uint8x8x4_t v) ++{ ++ uint8x8x4_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++float32x4x4_t ++f_vld4q_lane_f32 (float32_t * p, float32x4x4_t v) ++{ ++ float32x4x4_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_f32 (p, v, -1); ++ return res; +} -+ [(set_attr "type" "neon_fp_compare_s")] - ) - --(define_insn "neon_vcgtu" -+(define_insn "neon_vcu" - [(set (match_operand: 0 "s_register_operand" "=w") -- (unspec: -- [(match_operand:VDQIW 1 "s_register_operand" "w") -- (match_operand:VDQIW 2 "s_register_operand" "w")] -- UNSPEC_VCGTU))] -+ (neg: -+ (GTUGEU: -+ (match_operand:VDQIW 1 "s_register_operand" "w") -+ (match_operand:VDQIW 2 "s_register_operand" "w"))))] - "TARGET_NEON" -- "vcgt.u%#\t%0, %1, %2" -+ "vc.u%#\t%0, %1, %2" - [(set_attr "type" "neon_compare")] - ) - --;; VCLE and VCLT only support comparisons with immediate zero (register --;; variants are VCGE and VCGT with operands reversed). -- --(define_insn "neon_vcle" -- [(set (match_operand: 0 "s_register_operand" "=w") -- (unspec: -- [(match_operand:VDQW 1 "s_register_operand" "w") -- (match_operand:VDQW 2 "zero_operand" "Dz")] -- UNSPEC_VCLE))] -- "TARGET_NEON" -- "vcle.\t%0, %1, #0" -- [(set (attr "type") -- (if_then_else (match_test "") -- (const_string "neon_fp_compare_s") -- (if_then_else (match_operand 2 "zero_operand") -- (const_string "neon_compare_zero") -- (const_string "neon_compare"))))] --) -- --(define_insn "neon_vclt" -- [(set (match_operand: 0 "s_register_operand" "=w") -- (unspec: -- [(match_operand:VDQW 1 "s_register_operand" "w") -- (match_operand:VDQW 2 "zero_operand" "Dz")] -- UNSPEC_VCLT))] -+(define_expand "neon_vca" -+ [(set (match_operand: 0 "s_register_operand") -+ (neg: -+ (GTGE: -+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) -+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] - "TARGET_NEON" -- "vclt.\t%0, %1, #0" -- [(set (attr "type") -- (if_then_else (match_test "") -- (const_string "neon_fp_compare_s") -- (if_then_else (match_operand 2 "zero_operand") -- (const_string "neon_compare_zero") -- (const_string "neon_compare"))))] -+ { -+ if (flag_unsafe_math_optimizations) -+ emit_insn (gen_neon_vca_insn (operands[0], operands[1], -+ operands[2])); -+ else -+ emit_insn (gen_neon_vca_insn_unspec (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+ } - ) - --(define_insn "neon_vcage" -+(define_insn "neon_vca_insn" - [(set (match_operand: 0 "s_register_operand" "=w") -- (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") -- (match_operand:VCVTF 2 "s_register_operand" "w")] -- UNSPEC_VCAGE))] -- "TARGET_NEON" -- "vacge.\t%0, %1, %2" -+ (neg: -+ (GTGE: -+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) -+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] -+ "TARGET_NEON && flag_unsafe_math_optimizations" -+ "vac.\t%0, %1, %2" - [(set_attr "type" "neon_fp_compare_s")] - ) - --(define_insn "neon_vcagt" -+(define_insn "neon_vca_insn_unspec" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") - (match_operand:VCVTF 2 "s_register_operand" "w")] -- UNSPEC_VCAGT))] -+ NEON_VACMP))] - "TARGET_NEON" -- "vacgt.\t%0, %1, %2" -+ "vac.\t%0, %1, %2" - [(set_attr "type" "neon_fp_compare_s")] - ) - ---- a/src/gcc/config/arm/thumb2.md -+++ b/src/gcc/config/arm/thumb2.md -@@ -300,7 +300,7 @@ - ldr%?\\t%0, %1 - str%?\\t%1, %0 - str%?\\t%1, %0" -- [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1") -+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1") - (set_attr "length" "2,4,2,4,4,4,4,4,4") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") -@@ -486,12 +486,12 @@ - ) - - (define_insn_and_split "*thumb2_movsicc_insn" -- [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r") -+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r") - (if_then_else:SI - (match_operator 3 "arm_comparison_operator" - [(match_operand 4 "cc_register" "") (const_int 0)]) -- (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r") -- (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))] -+ (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r") -+ (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))] - "TARGET_THUMB2" - "@ - it\\t%D3\;mov%D3\\t%0, %2 -@@ -504,12 +504,14 @@ - # - # - # -+ # - #" - ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 -- ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 -- ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 -- ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 -- ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 -+ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 -+ ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 -+ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 -+ ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 -+ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 - "&& reload_completed" - [(const_int 0)] - { -@@ -540,10 +542,30 @@ - operands[2]))); - DONE; - } -- [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6") -- (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes") -+ [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6") -+ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes") - (set_attr "conds" "use") -- (set_attr "type" "multiple")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (if_then_else (match_operand 1 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (const_string "mvn_imm") -+ (if_then_else (match_operand 1 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (const_string "mvn_imm") -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple") -+ (const_string "multiple")])] - ) - - (define_insn "*thumb2_movsfcc_soft_insn" -@@ -1182,7 +1204,11 @@ - " - [(set_attr "predicable" "yes") - (set_attr "length" "2") -- (set_attr "type" "alu_sreg")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "alu_imm") -+ (const_string "alu_sreg")) -+ (const_string "alu_imm")])] - ) - - (define_insn "*thumb2_subsi_short" -@@ -1247,14 +1273,21 @@ - " - [(set_attr "conds" "set") - (set_attr "length" "2,2,4") -- (set_attr "type" "alu_sreg")] -+ (set_attr_alternative "type" -+ [(if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "alus_imm") -+ (const_string "alus_sreg")) -+ (const_string "alus_imm") -+ (if_then_else (match_operand 2 "const_int_operand" "") -+ (const_string "alus_imm") -+ (const_string "alus_sreg"))])] - ) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++float64x2x4_t ++f_vld4q_lane_f64 (float64_t * p, float64x2x4_t v) ++{ ++ float64x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_f64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++poly8x16x4_t ++f_vld4q_lane_p8 (poly8_t * p, poly8x16x4_t v) ++{ ++ poly8x16x4_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_p8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int16x8x4_t ++f_vld4q_lane_s16 (int16_t * p, int16x8x4_t v) ++{ ++ int16x8x4_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++int32x4x4_t ++f_vld4q_lane_s32 (int32_t * p, int32x4x4_t v) ++{ ++ int32x4x4_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int64x2x4_t ++f_vld4q_lane_s64 (int64_t * p, int64x2x4_t v) ++{ ++ int64x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++int8x16x4_t ++f_vld4q_lane_s8 (int8_t * p, int8x16x4_t v) ++{ ++ int8x16x4_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_s8 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint16x8x4_t ++f_vld4q_lane_u16 (uint16_t * p, uint16x8x4_t v) ++{ ++ uint16x8x4_t res; ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u16 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++uint32x4x4_t ++f_vld4q_lane_u32 (uint32_t * p, uint32x4x4_t v) ++{ ++ uint32x4x4_t res; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u32 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint64x2x4_t ++f_vld4q_lane_u64 (uint64_t * p, uint64x2x4_t v) ++{ ++ uint64x2x4_t res; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u64 (p, v, -1); ++ return res; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++uint8x16x4_t ++f_vld4q_lane_u8 (uint8_t * p, uint8x16x4_t v) ++{ ++ uint8x16x4_t res; ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ res = vld4q_lane_u8 (p, v, -1); ++ return res; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c +@@ -27,8 +27,6 @@ VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -37,8 +35,6 @@ VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -71,8 +67,6 @@ VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -81,8 +75,6 @@ VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -115,8 +107,6 @@ VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -125,8 +115,6 @@ VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -159,8 +147,6 @@ VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -169,8 +155,6 @@ VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -206,8 +190,6 @@ VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; + VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9, + 0xfffffffa, 0xfffffffb }; +-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, +@@ -216,8 +198,6 @@ VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; + VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, + 0xfffffffa, 0xfffffffb }; +-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, +@@ -252,8 +232,6 @@ VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -262,8 +240,6 @@ VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -296,8 +272,6 @@ VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -306,8 +280,6 @@ VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, + 0xfffc, 0xfffd, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, + 0xfffffff6, 0xfffffff7 }; +-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, +@@ -340,8 +312,6 @@ VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; + VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9, + 0xfffffffa, 0xfffffffb }; +-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, +@@ -350,8 +320,6 @@ VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; + VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, + 0xfffffffa, 0xfffffffb }; +-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, +@@ -384,8 +352,6 @@ VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; + VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd, + 0xfffffffe, 0xffffffff }; +-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, +@@ -394,8 +360,6 @@ VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; + VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd, + 0xfffffffe, 0xffffffff }; +-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, +@@ -500,6 +464,32 @@ void exec_vldX (void) + TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + ++ /* vldX supports all vector types except [u]int64x2. */ ++#define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment) \ ++ { \ ++ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ ++ \ ++ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ ++ } \ ++ + DECL_ALL_VLDX(2); + DECL_ALL_VLDX(3); + DECL_ALL_VLDX(4); +@@ -650,39 +640,39 @@ void exec_vldX (void) + clean_results (); + #define TEST_MSG "VLD2/VLD2Q" + TEST_ALL_VLDX(2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_1, "chunk 1"); + + /* Check vld3/vld3q. */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD3/VLD3Q" + TEST_ALL_VLDX(3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_1, "chunk 1"); + + TEST_ALL_EXTRA_CHUNKS(3, 2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_2, "chunk 2"); + + /* Check vld4/vld4q. */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD4/VLD4Q" + TEST_ALL_VLDX(4); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_1, "chunk 1"); + + TEST_ALL_EXTRA_CHUNKS(4, 2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_2, "chunk 2"); + + TEST_ALL_EXTRA_CHUNKS(4, 3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3"); ++ CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_3, "chunk 3"); + } - (define_insn "*thumb2_addsi3_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV -- (plus:SI (match_operand:SI 0 "s_register_operand" "l,l, r,r") -- (match_operand:SI 1 "arm_add_operand" "Pv,l,IL,r")) -+ (plus:SI (match_operand:SI 0 "s_register_operand" "l, r") -+ (match_operand:SI 1 "arm_add_operand" "lPv,rIL")) - (const_int 0)))] - "TARGET_THUMB2" - "* -@@ -1271,8 +1304,10 @@ - return \"cmn\\t%0, %1\"; - " - [(set_attr "conds" "set") -- (set_attr "length" "2,2,4,4") -- (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")] -+ (set_attr "length" "2,4") -+ (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "") -+ (const_string "alus_imm") -+ (const_string "alus_sreg")))] - ) + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c +@@ -19,34 +19,6 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; + VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld2_dup/chunk 1. */ + VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, +@@ -64,34 +36,6 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1, + 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld3_dup/chunk 0. */ + VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, +@@ -111,34 +55,6 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, + VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, + 0xfff2, 0xfff0 }; + VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld3_dup/chunk 1. */ + VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, +@@ -158,34 +74,6 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, + VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2, + 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 }; +-VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld3_dup/chunk 2. */ + VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, +@@ -205,34 +93,6 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, + VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0, + 0xfff1, 0xfff2 }; + VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 }; +-VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld4_dup/chunk 0. */ + VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +@@ -250,34 +110,6 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; + VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld4_dup/chunk 1. */ + VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +@@ -294,34 +126,6 @@ VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; + VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +-VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld4_dup/chunk 2. */ + VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +@@ -338,34 +142,6 @@ VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; + VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* vld4_dup/chunk3. */ + VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +@@ -382,33 +158,6 @@ VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; + VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +-VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + void exec_vldX_dup (void) + { +@@ -478,6 +227,21 @@ void exec_vldX_dup (void) + TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y) + ++ /* vldX_dup supports only 64-bit inputs. */ ++#define CHECK_RESULTS_VLDX_DUP(test_name,EXPECTED,comment) \ ++ { \ ++ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ ++ } \ + + DECL_ALL_VLDX_DUP(2); + DECL_ALL_VLDX_DUP(3); +@@ -629,39 +393,39 @@ void exec_vldX_dup (void) + clean_results (); + #define TEST_MSG "VLD2_DUP/VLD2Q_DUP" + TEST_ALL_VLDX_DUP(2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld2_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld2_1, "chunk 1"); + + /* Check vld3_dup/vld3q_dup. */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD3_DUP/VLD3Q_DUP" + TEST_ALL_VLDX_DUP(3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_1, "chunk 1"); + + TEST_ALL_EXTRA_CHUNKS(3, 2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_2, "chunk 2"); + + /* Check vld4_dup/vld4q_dup */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD4_DUP/VLD4Q_DUP" + TEST_ALL_VLDX_DUP(4); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_0, "chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_1, "chunk 1"); + + TEST_ALL_EXTRA_CHUNKS(4, 2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_2, "chunk 2"); + + TEST_ALL_EXTRA_CHUNKS(4, 3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3"); ++ CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_3, "chunk 3"); + } - (define_insn "*thumb2_mulsi_short" ---- a/src/gcc/config/arm/unknown-elf.h -+++ b/src/gcc/config/arm/unknown-elf.h -@@ -32,7 +32,9 @@ - #define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c +@@ -9,42 +9,24 @@ VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -55,40 +37,22 @@ VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xf0, 0xf1 }; + VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, +@@ -99,40 +63,22 @@ VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -143,40 +89,22 @@ VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; + VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xf0, 0xf1, 0xf2, 0xaa }; + VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +-VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xf0, 0xf1, 0xf2, 0xaa }; + VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; + VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; + VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -187,40 +115,22 @@ VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xf0, 0xf1, 0xf2 }; + VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; + VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +-VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; + VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, + 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa, +@@ -231,40 +141,22 @@ VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +-VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -275,40 +167,22 @@ VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +-VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -319,40 +193,22 @@ VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, +@@ -363,40 +219,22 @@ VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xf0, 0xf1, 0xf2, 0xf3 }; + VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; + VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa }; +-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, + 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, +@@ -542,6 +380,26 @@ void exec_vldX_lane (void) + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + ++ /* vldX_lane supports only a subset of all variants. */ ++#define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \ ++ { \ ++ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ ++ } \ ++ + /* Declare the temporary buffers / variables. */ + DECL_ALL_VLDX_LANE(2); + DECL_ALL_VLDX_LANE(3); +@@ -568,39 +426,39 @@ void exec_vldX_lane (void) + clean_results (); + #define TEST_MSG "VLD2_LANE/VLD2Q_LANE" + TEST_ALL_VLDX_LANE(2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, " chunk 0"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_0, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, " chunk 1"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_1, " chunk 1"); + + /* Check vld3_lane/vld3q_lane. */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD3_LANE/VLD3Q_LANE" + TEST_ALL_VLDX_LANE(3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, " chunk 0"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_0, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, " chunk 1"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_1, " chunk 1"); + + TEST_ALL_EXTRA_CHUNKS(3, 2); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, " chunk 2"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_2, " chunk 2"); + + /* Check vld4_lane/vld4q_lane. */ + clean_results (); + #undef TEST_MSG + #define TEST_MSG "VLD4_LANE/VLD4Q_LANE" + TEST_ALL_VLDX_LANE(4); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, " chunk 0"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_0, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, " chunk 1"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_1, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, " chunk 2"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_2, " chunk 2"); + + TEST_ALL_EXTRA_CHUNKS(4, 3); +- CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, " chunk 3"); ++ CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_3, " chunk 3"); + } - #undef STARTFILE_SPEC --#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC -+#define STARTFILE_SPEC \ -+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \ -+ UNKNOWN_ELF_STARTFILE_SPEC + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c +@@ -7,15 +7,12 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23, + 0x34, 0x45, 0x56, 0x67 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c, + 0xd0, 0x14, 0x58, 0x9c }; + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc, + 0xd0, 0x94, 0x58, 0x1c }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5, + 0x6c, 0xe3, 0x5a, 0xd1, +@@ -25,8 +22,6 @@ VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918, + 0xf9a0, 0xfa28, 0xfab0, 0xfb38 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709, + 0xfffff7a2, 0xfffff83b }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e, + 0x8, 0xb2, 0x5c, 0x6, + 0xb0, 0x5a, 0x4, 0xae, +@@ -35,14 +30,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681, + 0xf73c, 0xf7f7, 0xf8b2, 0xf96d }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c, + 0xfffff4d8, 0xfffff5a4 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e, + 0xc8, 0x62, 0x9c, 0x36, + 0x30, 0x9a, 0x64, 0xce, + 0x98, 0x32, 0xcc, 0x66 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; + VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000, + 0xc4ae4ccd, 0xc4a1d999 }; + +@@ -145,7 +136,22 @@ void FNNAME (INSN_NAME) (void) + TEST_VMUL(INSN_NAME, q, poly, p, 8, 16); + TEST_VMUL(INSN_NAME, q, float, f, 32, 4); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); + } - #define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s" + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c +@@ -12,41 +12,11 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, + 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results for float32 variants. Needs to be separated since + the generic test function does not test floating-point +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c +@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb, + 0xfd, 0xfd, 0xff, 0xff, + 0xf9, 0xf9, 0xfb, 0xfb, +@@ -38,11 +34,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, + 0xfffffffa, 0xfffffffb }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc, + 0xfffffffffffffffd }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c +@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7, + 0xf6, 0xf7, 0xf6, 0xf7, + 0xfe, 0xff, 0xfe, 0xff, +@@ -38,11 +34,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7, + 0xfffffff7, 0xfffffff7 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, + 0xfffffffffffffff3 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c +@@ -11,16 +11,6 @@ void vqabs_extra(void); + VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9, + 0x8, 0x7, 0x6, 0x5, +@@ -28,25 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, + VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected values of cumulative_saturation flag. */ + int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c +@@ -39,10 +39,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4, + 0x5, 0x6, 0x7, 0x8, + 0x9, 0xa, 0xb, 0xc, +@@ -61,14 +57,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, + 0xffffffffffffffff }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; -@@ -80,7 +82,9 @@ - \ - ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ - ASM_OUTPUT_LABEL (FILE, NAME); \ -- fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \ -+ fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1); \ -+ fprintf (FILE, "\t.size\t%s, %d\n", \ -+ NAME, SIZE ? (int) SIZE : 1); \ - } \ - while (0) ---- a/src/gcc/config/glibc-stdint.h -+++ b/src/gcc/config/glibc-stdint.h -@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ + /* 64-bits types, with 0 as second input. */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c +@@ -0,0 +1,134 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12, ++ 0x12, 0x12, 0x12, 0x12 }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 }; ++ ++/* Expected values of cumulative_saturation flag when saturation occurs. */ ++int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1; ++ ++/* Expected results when saturation occurs. */ ++VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ ++#define INSN_NAME vqmovn ++#define TEST_MSG "VQMOVN" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN_NAME) ++{ ++ /* Basic test: y=OP(x), then store the result. */ ++#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ ++ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* No need for 64 bits variants. */ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ DECL_VARIABLE(vector, uint, 16, 8); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, uint, 64, 2); ++ ++ DECL_VARIABLE(vector_res, int, 8, 8); ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ ++ clean_results (); ++ ++ /* Fill input vector with arbitrary values. */ ++ VDUP(vector, q, int, s, 16, 8, 0x12); ++ VDUP(vector, q, int, s, 32, 4, 0x1278); ++ VDUP(vector, q, int, s, 64, 2, 0x12345678); ++ VDUP(vector, q, uint, u, 16, 8, 0x82); ++ VDUP(vector, q, uint, u, 32, 4, 0x8765); ++ VDUP(vector, q, uint, u, 64, 2, 0x87654321); ++ ++ /* Apply a unary operator named INSN_NAME. */ ++#define CMT "" ++ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ ++ ++ /* Fill input vector with arbitrary values which cause cumulative ++ saturation. */ ++ VDUP(vector, q, int, s, 16, 8, 0x1234); ++ VDUP(vector, q, int, s, 32, 4, 0x12345678); ++ VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL); ++ VDUP(vector, q, uint, u, 16, 8, 0x8234); ++ VDUP(vector, q, uint, u, 32, 4, 0x87654321); ++ VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL); ++ ++ /* Apply a unary operator named INSN_NAME. */ ++#undef CMT ++#define CMT " (with saturation)" ++ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT); ++ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT); ++ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqmovn (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c +@@ -0,0 +1,93 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34, ++ 0x34, 0x34, 0x34, 0x34 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 }; ++ ++/* Expected values of cumulative_saturation flag with negative input. */ ++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1; ++ ++/* Expected results with negative input. */ ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++ ++#define INSN_NAME vqmovun ++#define TEST_MSG "VQMOVUN" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN_NAME) ++{ ++ /* Basic test: y=OP(x), then store the result. */ ++#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ ++ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ ++ clean_results (); ++ ++ /* Fill input vector with arbitrary values. */ ++ VDUP(vector, q, int, s, 16, 8, 0x34); ++ VDUP(vector, q, int, s, 32, 4, 0x5678); ++ VDUP(vector, q, int, s, 64, 2, 0x12345678); ++ ++ /* Apply a unary operator named INSN_NAME. */ ++#define CMT "" ++ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ ++ /* Fill input vector with negative values. */ ++ VDUP(vector, q, int, s, 16, 8, 0x8234); ++ VDUP(vector, q, int, s, 32, 4, 0x87654321); ++ VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL); ++ ++ /* Apply a unary operator named INSN_NAME. */ ++#undef CMT ++#define CMT " (negative input)" ++ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT); ++ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqmovun (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c +@@ -11,16 +11,6 @@ void vqneg_extra(void); + VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; + VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9, + 0x8, 0x7, 0x6, 0x5, +@@ -28,25 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, + VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, + 0xc, 0xb, 0xa, 0x9 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; -+/* Systems using musl libc should use this header and make sure -+ OPTION_MUSL is defined correctly before using the TYPE macros. */ -+#ifndef OPTION_MUSL -+#define OPTION_MUSL 0 -+#endif + /* Expected values of cumulative_saturation flag. */ + int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c +@@ -0,0 +1,161 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag when multiplication ++ saturates. */ ++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; ++ ++/* Expected results when multiplication saturates. */ ++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++/* Expected values of cumulative_saturation flag when rounding ++ should not cause saturation. */ ++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; ++ ++/* Expected results when rounding should not cause saturation. */ ++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++#define INSN vqrdmulh ++#define TEST_MSG "VQRDMULH" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* vector_res = vqrdmulh(vector,vector2), then store the result. */ ++#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ DECL_VARIABLE(vector, int, 16, 4); ++ DECL_VARIABLE(vector, int, 32, 2); ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, int, 16, 8); ++ DECL_VARIABLE(vector_res, int, 32, 4); ++ ++ DECL_VARIABLE(vector2, int, 16, 4); ++ DECL_VARIABLE(vector2, int, 32, 2); ++ DECL_VARIABLE(vector2, int, 16, 8); ++ DECL_VARIABLE(vector2, int, 32, 4); ++ ++ clean_results (); ++ ++ VLOAD(vector, buffer, , int, s, 16, 4); ++ VLOAD(vector, buffer, , int, s, 32, 2); ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ ++ /* Initialize vector2. */ ++ VDUP(vector2, , int, s, 16, 4, 0x5555); ++ VDUP(vector2, , int, s, 32, 2, 0xBB); ++ VDUP(vector2, q, int, s, 16, 8, 0x33); ++ VDUP(vector2, q, int, s, 32, 4, 0x22); ++ ++#define CMT "" ++ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ ++ /* Now use input values such that the multiplication causes ++ saturation. */ ++#define TEST_MSG_MUL " (check mul cumulative saturation)" ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector2, , int, s, 16, 4, 0x8000); ++ VDUP(vector2, , int, s, 32, 2, 0x80000000); ++ VDUP(vector2, q, int, s, 16, 8, 0x8000); ++ VDUP(vector2, q, int, s, 32, 4, 0x80000000); ++ ++ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); ++ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL); ++ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL); ++ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); ++ ++ /* Use input values where rounding produces a result equal to the ++ saturation value, but does not set the saturation flag. */ ++#define TEST_MSG_ROUND " (check rounding)" ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector2, , int, s, 16, 4, 0x8001); ++ VDUP(vector2, , int, s, 32, 2, 0x80000001); ++ VDUP(vector2, q, int, s, 16, 8, 0x8001); ++ VDUP(vector2, q, int, s, 32, 4, 0x80000001); ++ ++ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND); ++ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND); ++ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND); ++ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++} ++ ++int main (void) ++{ ++ exec_vqrdmulh (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c +@@ -0,0 +1,169 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag when multiplication ++ saturates. */ ++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; ++ ++/* Expected results when multiplication saturates. */ ++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++/* Expected values of cumulative_saturation flag when rounding ++ should not cause saturation. */ ++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; ++ ++/* Expected results when rounding should not cause saturation. */ ++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++#define INSN vqrdmulh ++#define TEST_MSG "VQRDMULH_LANE" ++ ++#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ ++#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N2), \ ++ L); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ DECL_VARIABLE(vector, int, 16, 4); ++ DECL_VARIABLE(vector, int, 32, 2); ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, int, 16, 8); ++ DECL_VARIABLE(vector_res, int, 32, 4); ++ ++ /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with ++ the same number of elements, so we need only one variable of each ++ type. */ ++ DECL_VARIABLE(vector2, int, 16, 4); ++ DECL_VARIABLE(vector2, int, 32, 2); ++ ++ clean_results (); ++ ++ VLOAD(vector, buffer, , int, s, 16, 4); ++ VLOAD(vector, buffer, , int, s, 32, 2); ++ ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ ++ /* Initialize vector2. */ ++ VDUP(vector2, , int, s, 16, 4, 0x55); ++ VDUP(vector2, , int, s, 32, 2, 0xBB); ++ ++ /* Choose lane arbitrarily. */ ++#define CMT "" ++ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ ++ /* Now use input values such that the multiplication causes ++ saturation. */ ++#define TEST_MSG_MUL " (check mul cumulative saturation)" ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector2, , int, s, 16, 4, 0x8000); ++ VDUP(vector2, , int, s, 32, 2, 0x80000000); ++ ++ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); ++ ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector2, , int, s, 16, 4, 0x8001); ++ VDUP(vector2, , int, s, 32, 2, 0x80000001); ++ ++ /* Use input values where rounding produces a result equal to the ++ saturation value, but does not set the saturation flag. */ ++#define TEST_MSG_ROUND " (check rounding)" ++ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++} ++ ++int main (void) ++{ ++ exec_vqrdmulh_lane (); ++ return 0; ++} ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c +@@ -0,0 +1,155 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5, ++ 0x5, 0x4, 0x4, 0x4 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, ++ 0xfffffffe, 0xfffffffe }; ++ ++/* Expected values of cumulative_saturation flag when multiplication ++ saturates. */ ++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; ++ ++/* Expected results when multiplication saturates. */ ++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++/* Expected values of cumulative_saturation flag when rounding ++ should not cause saturation. */ ++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; ++ ++/* Expected results when rounding should not cause saturation. */ ++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++ ++#define INSN vqrdmulh ++#define TEST_MSG "VQRDMULH_N" ++ ++#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ int i; ++ ++ /* vector_res = vqrdmulh_n(vector,val), then store the result. */ ++#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ L); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ DECL_VARIABLE(vector, int, 16, 4); ++ DECL_VARIABLE(vector, int, 32, 2); ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, int, 16, 8); ++ DECL_VARIABLE(vector_res, int, 32, 4); ++ ++ clean_results (); ++ ++ VLOAD(vector, buffer, , int, s, 16, 4); ++ VLOAD(vector, buffer, , int, s, 32, 2); ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ ++ /* Choose multiplier arbitrarily. */ ++#define CMT "" ++ TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT); ++ TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ ++ /* Now use input values such that the multiplication causes ++ saturation. */ ++#define TEST_MSG_MUL " (check mul cumulative saturation)" ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ ++ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul, ++ TEST_MSG_MUL); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); ++ ++ /* Use input values where rounding produces a result equal to the ++ saturation value, but does not set the saturation flag. */ ++#define TEST_MSG_ROUND " (check rounding)" ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ ++ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round, ++ TEST_MSG_ROUND); ++ ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++} ++ ++int main (void) ++{ ++ exec_vqrdmulh_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c +@@ -0,0 +1,1090 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag with input=0. */ ++int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0; ++ ++/* Expected results with input=0. */ ++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with input=0 and ++ negative shift amount. */ ++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0; ++ ++/* Expected results with input=0 and negative shift amount. */ ++VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, ++ 0xe8, 0xea, 0xec, 0xee }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, ++ 0x8000, 0x8000, 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_saturation flag with negative shift ++ amount. */ ++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0; ++ ++/* Expected results with negative shift amount. */ ++VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, ++ 0xfd, 0xfd, 0xfe, 0xfe }; ++VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; ++VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, ++ 0x3d, 0x3d, 0x3e, 0x3e }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; ++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff }; ++VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2 }; ++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000, ++ 0x80000, 0x80000 }; ++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; ++ ++/* Expected values of cumulative_saturation flag with input=max and ++ shift by -1. */ ++int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0; ++ ++/* Expected results with input=max and shift by -1. */ ++VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 }; ++VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, ++ 0x4000, 0x4000, 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000, ++ 0x4000000000000000 }; ++VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, ++ 0x8000, 0x8000, 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++ ++/* Expected values of cumulative_saturation flag with input=max and ++ shift by -3. */ ++int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0; ++ ++/* Expected results with input=max and shift by -3. */ ++VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 }; ++VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 }; ++VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, ++ 0x1000, 0x1000, 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000, ++ 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000, ++ 0x1000000000000000 }; ++VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, ++ 0x2000, 0x2000, 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000, ++ 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000, ++ 0x2000000000000000 }; ++ ++/* Expected values of cumulative_saturation flag with input=max and ++ large shift amount. */ ++int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1; ++ ++/* Expected results with input=max and large shift amount. */ ++VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff, ++ 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_saturation flag with negative input and ++ large shift amount. */ ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1; ++ ++/* Expected results with negative input and large shift amount. */ ++VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff, ++ 0xffffffff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff, ++ 0xffffffff, ++ 0xffffffff, ++ 0xffffffff }; ++VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_saturation flag with max/min input and ++ large negative shift amount. */ ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0; ++ ++/* Expected results with max/min input and large negative shift amount. */ ++VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with input=0 and ++ large negative shift amount. */ ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0; ++ ++/* Expected results with input=0 and large negative shift amount. */ ++VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 }; ++ ++#define INSN vqrshl ++#define TEST_MSG "VQRSHL/VQRSHLQ" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* Basic test: v3=vqrshl(v1,v2), then store the result. */ ++#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector_shift, T3, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ ++ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); ++ ++ clean_results (); ++ ++ /* Fill input vector with 0, to check saturation on limits. */ ++ VDUP(vector, , int, s, 8, 8, 0); ++ VDUP(vector, , int, s, 16, 4, 0); ++ VDUP(vector, , int, s, 32, 2, 0); ++ VDUP(vector, , int, s, 64, 1, 0); ++ VDUP(vector, , uint, u, 8, 8, 0); ++ VDUP(vector, , uint, u, 16, 4, 0); ++ VDUP(vector, , uint, u, 32, 2, 0); ++ VDUP(vector, , uint, u, 64, 1, 0); ++ VDUP(vector, q, int, s, 8, 16, 0); ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 8, 16, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0); ++ VDUP(vector, q, uint, u, 32, 4, 0); ++ VDUP(vector, q, uint, u, 64, 2, 0); ++ ++ /* Choose init value arbitrarily, will be used as shift amount */ ++ /* Use values equal to or one-less-than the type width to check ++ behaviour on limits. */ ++ VDUP(vector_shift, , int, s, 8, 8, 7); ++ VDUP(vector_shift, , int, s, 16, 4, 15); ++ VDUP(vector_shift, , int, s, 32, 2, 31); ++ VDUP(vector_shift, , int, s, 64, 1, 63); ++ VDUP(vector_shift, q, int, s, 8, 16, 8); ++ VDUP(vector_shift, q, int, s, 16, 8, 16); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 64); ++ ++#define CMT " (with input = 0)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT); ++ ++ ++ /* Use negative shift amounts. */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); ++ ++#undef CMT ++#define CMT " (input 0 and negative shift amount)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT); ++ ++ ++ /* Test again, with predefined input values. */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose init value arbitrarily, will be used as shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 1); ++ VDUP(vector_shift, , int, s, 16, 4, 3); ++ VDUP(vector_shift, , int, s, 32, 2, 8); ++ VDUP(vector_shift, , int, s, 64, 1, 3); ++ VDUP(vector_shift, q, int, s, 8, 16, 10); ++ VDUP(vector_shift, q, int, s, 16, 8, 12); ++ VDUP(vector_shift, q, int, s, 32, 4, 31); ++ VDUP(vector_shift, q, int, s, 64, 2, 63); ++ ++#undef CMT ++#define CMT "" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++ ++ ++ /* Use negative shift amounts. */ ++ VDUP(vector_shift, , int, s, 8, 8, -2); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); ++ ++#undef CMT ++#define CMT " (negative shift amount)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); ++ ++ ++ /* Fill input vector with max value, to check saturation on ++ limits. */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++ /* Use -1 shift amount to check cumulative saturation with ++ round_const. */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -1); ++ VDUP(vector_shift, , int, s, 32, 2, -1); ++ VDUP(vector_shift, , int, s, 64, 1, -1); ++ VDUP(vector_shift, q, int, s, 8, 16, -1); ++ VDUP(vector_shift, q, int, s, 16, 8, -1); ++ VDUP(vector_shift, q, int, s, 32, 4, -1); ++ VDUP(vector_shift, q, int, s, 64, 2, -1); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: shift by -1)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT); ++ ++ ++ /* Use -3 shift amount to check cumulative saturation with ++ round_const. */ ++ VDUP(vector_shift, , int, s, 8, 8, -3); ++ VDUP(vector_shift, , int, s, 16, 4, -3); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -3); ++ VDUP(vector_shift, q, int, s, 8, 16, -3); ++ VDUP(vector_shift, q, int, s, 16, 8, -3); ++ VDUP(vector_shift, q, int, s, 32, 4, -3); ++ VDUP(vector_shift, q, int, s, 64, 2, -3); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: shift by -3)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT); ++ ++ ++ /* Use large shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 10); ++ VDUP(vector_shift, , int, s, 16, 4, 20); ++ VDUP(vector_shift, , int, s, 32, 2, 40); ++ VDUP(vector_shift, , int, s, 64, 1, 70); ++ VDUP(vector_shift, q, int, s, 8, 16, 10); ++ VDUP(vector_shift, q, int, s, 16, 8, 20); ++ VDUP(vector_shift, q, int, s, 32, 4, 40); ++ VDUP(vector_shift, q, int, s, 64, 2, 70); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: large shift amount)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT); ++ ++ ++ /* Fill input vector with negative values, to check saturation on ++ limits. */ ++ VDUP(vector, , int, s, 8, 8, 0x80); ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); ++ VDUP(vector, q, int, s, 8, 16, 0x80); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++ ++ /* Use large shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 10); ++ VDUP(vector_shift, , int, s, 16, 4, 20); ++ VDUP(vector_shift, , int, s, 32, 2, 40); ++ VDUP(vector_shift, , int, s, 64, 1, 70); ++ VDUP(vector_shift, q, int, s, 8, 16, 10); ++ VDUP(vector_shift, q, int, s, 16, 8, 20); ++ VDUP(vector_shift, q, int, s, 32, 4, 40); ++ VDUP(vector_shift, q, int, s, 64, 2, 70); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: large shift amount with negative input)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT); ++ ++ ++ /* Fill input vector with negative and positive values, to check ++ * saturation on limits */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, int, s, 8, 16, 0x80); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++ ++ /* Use large negative shift amount */ ++ VDUP(vector_shift, , int, s, 8, 8, -10); ++ VDUP(vector_shift, , int, s, 16, 4, -20); ++ VDUP(vector_shift, , int, s, 32, 2, -40); ++ VDUP(vector_shift, , int, s, 64, 1, -70); ++ VDUP(vector_shift, q, int, s, 8, 16, -10); ++ VDUP(vector_shift, q, int, s, 16, 8, -20); ++ VDUP(vector_shift, q, int, s, 32, 4, -40); ++ VDUP(vector_shift, q, int, s, 64, 2, -70); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: large negative shift amount)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++ ++ ++ /* Fill input vector with 0, to check saturation in case of large ++ * shift amount */ ++ VDUP(vector, , int, s, 8, 8, 0); ++ VDUP(vector, , int, s, 16, 4, 0); ++ VDUP(vector, , int, s, 32, 2, 0); ++ VDUP(vector, , int, s, 64, 1, 0); ++ VDUP(vector, q, int, s, 8, 16, 0); ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ ++ /* Use large shift amount */ ++ VDUP(vector_shift, , int, s, 8, 8, -10); ++ VDUP(vector_shift, , int, s, 16, 4, -20); ++ VDUP(vector_shift, , int, s, 32, 2, -40); ++ VDUP(vector_shift, , int, s, 64, 1, -70); ++ VDUP(vector_shift, q, int, s, 8, 16, -10); ++ VDUP(vector_shift, q, int, s, 16, 8, -20); ++ VDUP(vector_shift, q, int, s, 32, 4, -40); ++ VDUP(vector_shift, q, int, s, 64, 2, -70); ++ ++#undef CMT ++#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)" ++ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); ++ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqrshl (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c +@@ -0,0 +1,174 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, ++ 0xfa, 0xfb, 0xfb, 0xfc }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ ++/* Expected values of cumulative_saturation flag with shift by 3. */ ++int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1; ++ ++/* Expected results with shift by 3. */ ++VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ ++/* Expected values of cumulative_saturation flag with shift by max ++ amount. */ ++int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1; ++ ++/* Expected results with shift by max amount. */ ++VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ ++#define INSN vqrshrn_n ++#define TEST_MSG "VQRSHRN_N" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* Basic test: y=vqrshrn_n(x,v), then store the result. */ ++#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ ++ VECT_VAR(vector_res, T1, W2, N) = \ ++ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ ++ VECT_VAR(vector_res, T1, W2, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ /* vector is twice as large as vector_res. */ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ DECL_VARIABLE(vector, uint, 16, 8); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, uint, 64, 2); ++ ++ DECL_VARIABLE(vector_res, int, 8, 8); ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ ++ clean_results (); ++ ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ VLOAD(vector, buffer, q, int, s, 64, 2); ++ VLOAD(vector, buffer, q, uint, u, 16, 8); ++ VLOAD(vector, buffer, q, uint, u, 32, 4); ++ VLOAD(vector, buffer, q, uint, u, 64, 2); ++ ++ /* Choose shift amount arbitrarily. */ ++#define CMT "" ++ TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT); ++ TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT); ++ TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT); ++ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT); ++ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT); ++ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ ++ ++ /* Another set of tests, shifting max value by 3. */ ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++#undef CMT ++#define CMT " (check saturation: shift by 3)" ++ TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT); ++ TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT); ++ TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT); ++ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT); ++ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT); ++ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT); ++ ++ ++ /* Shift by max amount. */ ++#undef CMT ++#define CMT " (check saturation: shift by max)" ++ TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT); ++ TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT); ++ TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT); ++ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT); ++ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT); ++ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqrshrn_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c +@@ -0,0 +1,189 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag with negative unput. */ ++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; ++ ++/* Expected results with negative input. */ ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with max input value ++ shifted by 1. */ ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1; ++ ++/* Expected results with max input value shifted by 1. */ ++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 }; ++ ++/* Expected values of cumulative_saturation flag with max input value ++ shifted by max amount. */ ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0; ++ ++/* Expected results with max input value shifted by max amount. */ ++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++ ++/* Expected values of cumulative_saturation flag with min input value ++ shifted by max amount. */ ++int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1; ++ ++/* Expected results with min input value shifted by max amount. */ ++VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with inputs in usual ++ range. */ ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++ ++/* Expected results with inputs in usual range. */ ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49, ++ 0x49, 0x49, 0x49, 0x49 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf }; ++ ++#define INSN vqrshrun_n ++#define TEST_MSG "VQRSHRUN_N" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* Basic test: y=vqrshrun_n(x,v), then store the result. */ ++#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ ++ VECT_VAR(vector_res, uint, W2, N) = \ ++ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_u##W2(VECT_VAR(result, uint, W2, N), \ ++ VECT_VAR(vector_res, uint, W2, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ /* vector is twice as large as vector_res. */ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ ++ clean_results (); ++ ++ /* Fill input vector with negative values, to check saturation on ++ limits. */ ++ VDUP(vector, q, int, s, 16, 8, -2); ++ VDUP(vector, q, int, s, 32, 4, -3); ++ VDUP(vector, q, int, s, 64, 2, -4); ++ ++ /* Choose shift amount arbitrarily. */ ++#define CMT " (negative input)" ++ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ ++ ++ /* Fill input vector with max value, to check saturation on ++ limits. */ ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ ++ /* shift by 1. */ ++#undef CMT ++#define CMT " (check cumulative saturation: shift by 1)" ++ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT); ++ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT); ++ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT); ++ ++ ++ /* shift by max. */ ++#undef CMT ++#define CMT " (check cumulative saturation: shift by max, positive input)" ++ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT); ++ ++ ++ /* Fill input vector with min value, to check saturation on limits. */ ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++ ++ /* shift by max */ ++#undef CMT ++#define CMT " (check cumulative saturation: shift by max, negative input)" ++ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT); ++ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT); ++ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT); ++ ++ ++ /* Fill input vector with positive values, to check normal case. */ ++ VDUP(vector, q, int, s, 16, 8, 0x1234); ++ VDUP(vector, q, int, s, 32, 4, 0x87654321); ++ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); ++ ++ /* shift arbitrary amount. */ ++#undef CMT ++#define CMT "" ++ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT); ++ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT); ++ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqrshrun_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c +@@ -0,0 +1,829 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag with input=0. */ ++int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0; ++ ++/* Expected results with input=0. */ ++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with input=0 and ++ negative shift amount. */ ++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0; ++ ++/* Expected results with input=0 and negative shift amount. */ ++VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, ++ 0xe8, 0xea, 0xec, 0xee }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, ++ 0x8000, 0x8000, 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_sat_saturation flag with negative shift ++ amount. */ ++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0; ++ ++/* Expected results with negative shift amount. */ ++VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, ++ 0xfa, 0xfa, 0xfb, 0xfb }; ++VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc }; ++VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, ++ 0x7a, 0x7a, 0x7b, 0x7b }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; ++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff }; ++VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f, ++ 0x1f, 0x1f, 0x1f, 0x1f }; ++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff, ++ 0x7ffff, 0x7ffff }; ++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff }; ++ ++/* Expected values of cumulative_sat_saturation flag with negative ++ input and large shift amount. */ ++int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1; ++ ++/* Expected results with negative input and large shift amount. */ ++VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_sat_saturation flag with max input ++ and shift by -1. */ ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0; ++ ++/* Expected results with max input and shift by -1. */ ++VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f, ++ 0x3f, 0x3f, 0x3f, 0x3f }; ++VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff, ++ 0x3fff, 0x3fff }; ++VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff }; ++VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff }; ++VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f, ++ 0x3f, 0x3f, 0x3f, 0x3f, ++ 0x3f, 0x3f, 0x3f, 0x3f, ++ 0x3f, 0x3f, 0x3f, 0x3f }; ++VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff, ++ 0x3fff, 0x3fff, ++ 0x3fff, 0x3fff, ++ 0x3fff, 0x3fff }; ++VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff, ++ 0x3fffffff, 0x3fffffff }; ++VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff, ++ 0x3fffffffffffffff }; ++VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff, ++ 0x7fffffffffffffff }; ++ ++/* Expected values of cumulative_sat_saturation flag with max input ++ and large shift amount. */ ++int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1; ++ ++/* Expected results with max input and large shift amount. */ ++VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff, ++ 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; ++ ++/* Expected values of cumulative_sat_saturation flag with saturation ++ on 64-bits values. */ ++int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1; ++ ++/* Expected results with saturation on 64-bits values.. */ ++VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff, ++ 0x7fffffffffffffff }; ++ ++#define INSN vqshl ++#define TEST_MSG "VQSHL/VQSHLQ" ++ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) ++{ ++ /* Basic test: v3=vqshl(v1,v2), then store the result. */ ++#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector_shift, T3, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ ++ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); ++ ++ clean_results (); ++ ++ /* Fill input vector with 0, to check saturation on limits. */ ++ VDUP(vector, , int, s, 8, 8, 0); ++ VDUP(vector, , int, s, 16, 4, 0); ++ VDUP(vector, , int, s, 32, 2, 0); ++ VDUP(vector, , int, s, 64, 1, 0); ++ VDUP(vector, , uint, u, 8, 8, 0); ++ VDUP(vector, , uint, u, 16, 4, 0); ++ VDUP(vector, , uint, u, 32, 2, 0); ++ VDUP(vector, , uint, u, 64, 1, 0); ++ VDUP(vector, q, int, s, 8, 16, 0); ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 8, 16, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0); ++ VDUP(vector, q, uint, u, 32, 4, 0); ++ VDUP(vector, q, uint, u, 64, 2, 0); ++ ++ /* Choose init value arbitrarily, will be used as shift amount */ ++ /* Use values equal or one-less-than the type width to check ++ behaviour on limits. */ ++ ++ /* 64-bits vectors first. */ ++ /* Shift 8-bits lanes by 7... */ ++ VDUP(vector_shift, , int, s, 8, 8, 7); ++ /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9). */ ++ VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6); ++ VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8); ++ VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9); ++ ++ /* Shift 16-bits lanes by 15... */ ++ VDUP(vector_shift, , int, s, 16, 4, 15); ++ /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17). */ ++ VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14); ++ VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16); ++ VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17); ++ ++ /* Shift 32-bits lanes by 31... */ ++ VDUP(vector_shift, , int, s, 32, 2, 31); ++ /* ... except lane 1 (by 30). */ ++ VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30); ++ ++ /* Shift 64 bits lane by 63. */ ++ VDUP(vector_shift, , int, s, 64, 1, 63); ++ ++ /* 128-bits vectors. */ ++ /* Shift 8-bits lanes by 8. */ ++ VDUP(vector_shift, q, int, s, 8, 16, 8); ++ /* Shift 16-bits lanes by 16. */ ++ VDUP(vector_shift, q, int, s, 16, 8, 16); ++ /* Shift 32-bits lanes by 32... */ ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ /* ... except lane 1 (by 33). */ ++ VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33); ++ ++ /* Shift 64-bits lanes by 64... */ ++ VDUP(vector_shift, q, int, s, 64, 2, 64); ++ /* ... except lane 1 (by 62). */ ++ VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62); ++ ++#define CMT " (with input = 0)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT); ++ ++ ++ /* Use negative shift amounts */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); ++ ++#undef CMT ++#define CMT " (input 0 and negative shift amount)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT); + - #define SIG_ATOMIC_TYPE "int" - - #define INT8_TYPE "signed char" -@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") - - #define INT_FAST8_TYPE "signed char" --#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") --#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") -+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int") -+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int") - #define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int") - #define UINT_FAST8_TYPE "unsigned char" --#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int") --#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int") -+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int") -+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int") - #define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") - - #define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int") ---- a/src/gcc/config/linux.h -+++ b/src/gcc/config/linux.h -@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) - #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) - #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) - #else - #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) - #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) - #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (linux_libc == LIBC_MUSL) - #endif - - #define GNU_USER_TARGET_OS_CPP_BUILTINS() \ -@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - } while (0) - - /* Determine which dynamic linker to use depending on whether GLIBC or -- uClibc or Bionic is the default C library and whether -- -muclibc or -mglibc or -mbionic has been passed to change the default. */ -+ uClibc or Bionic or musl is the default C library and whether -+ -muclibc or -mglibc or -mbionic or -mmusl has been passed to change -+ the default. */ - --#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3) \ -- "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}" -+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4) \ -+ "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}" - - #if DEFAULT_LIBC == LIBC_GLIBC --#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ -- CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B) -+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ -+ CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M) - #elif DEFAULT_LIBC == LIBC_UCLIBC --#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ -- CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B) -+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ -+ CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M) - #elif DEFAULT_LIBC == LIBC_BIONIC --#define CHOOSE_DYNAMIC_LINKER(G, U, B) \ -- CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U) -+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ -+ CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M) -+#elif DEFAULT_LIBC == LIBC_MUSL -+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \ -+ CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B) - #else - #error "Unsupported DEFAULT_LIBC" - #endif /* DEFAULT_LIBC */ -@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker" - #define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64" - #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32" -+/* Should be redefined for each target that supports musl. */ -+#define MUSL_DYNAMIC_LINKER "/dev/null" -+#define MUSL_DYNAMIC_LINKER32 "/dev/null" -+#define MUSL_DYNAMIC_LINKER64 "/dev/null" -+#define MUSL_DYNAMIC_LINKERX32 "/dev/null" - - #define GNU_USER_DYNAMIC_LINKER \ - CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER, \ -- BIONIC_DYNAMIC_LINKER) -+ BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER) - #define GNU_USER_DYNAMIC_LINKER32 \ - CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \ -- BIONIC_DYNAMIC_LINKER32) -+ BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32) - #define GNU_USER_DYNAMIC_LINKER64 \ - CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \ -- BIONIC_DYNAMIC_LINKER64) -+ BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64) - #define GNU_USER_DYNAMIC_LINKERX32 \ - CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \ -- BIONIC_DYNAMIC_LINKERX32) -+ BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32) - - /* Whether we have Bionic libc runtime */ - #undef TARGET_HAS_BIONIC - #define TARGET_HAS_BIONIC (OPTION_BIONIC) - -+/* musl avoids problematic includes by rearranging the include directories. -+ * Unfortunately, this is mostly duplicated from cppdefault.c */ -+#if DEFAULT_LIBC == LIBC_MUSL -+#define INCLUDE_DEFAULTS_MUSL_GPP \ -+ { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, \ -+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, \ -+ { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, \ -+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 }, \ -+ { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, \ -+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, ++ /* Test again, with predefined input values. */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + -+#ifdef LOCAL_INCLUDE_DIR -+#define INCLUDE_DEFAULTS_MUSL_LOCAL \ -+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, \ -+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 }, -+#else -+#define INCLUDE_DEFAULTS_MUSL_LOCAL -+#endif ++ /* Choose init value arbitrarily, will be used as shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 1); ++ VDUP(vector_shift, , int, s, 16, 4, 3); ++ VDUP(vector_shift, , int, s, 32, 2, 8); ++ VDUP(vector_shift, , int, s, 64, 1, -3); ++ VDUP(vector_shift, q, int, s, 8, 16, 10); ++ VDUP(vector_shift, q, int, s, 16, 8, 12); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 63); + -+#ifdef PREFIX_INCLUDE_DIR -+#define INCLUDE_DEFAULTS_MUSL_PREFIX \ -+ { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0}, -+#else -+#define INCLUDE_DEFAULTS_MUSL_PREFIX -+#endif ++#undef CMT ++#define CMT "" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT); + -+#ifdef CROSS_INCLUDE_DIR -+#define INCLUDE_DEFAULTS_MUSL_CROSS \ -+ { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0}, -+#else -+#define INCLUDE_DEFAULTS_MUSL_CROSS -+#endif ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); + -+#ifdef TOOL_INCLUDE_DIR -+#define INCLUDE_DEFAULTS_MUSL_TOOL \ -+ { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0}, -+#else -+#define INCLUDE_DEFAULTS_MUSL_TOOL -+#endif + -+#ifdef NATIVE_SYSTEM_HEADER_DIR -+#define INCLUDE_DEFAULTS_MUSL_NATIVE \ -+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 }, \ -+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 }, -+#else -+#define INCLUDE_DEFAULTS_MUSL_NATIVE -+#endif ++ /* Use negative shift amounts */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); + -+#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT) -+# undef INCLUDE_DEFAULTS_MUSL_LOCAL -+# define INCLUDE_DEFAULTS_MUSL_LOCAL -+# undef INCLUDE_DEFAULTS_MUSL_NATIVE -+# define INCLUDE_DEFAULTS_MUSL_NATIVE -+#else -+# undef INCLUDE_DEFAULTS_MUSL_CROSS -+# define INCLUDE_DEFAULTS_MUSL_CROSS -+#endif ++#undef CMT ++#define CMT " (negative shift amount)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); + -+#undef INCLUDE_DEFAULTS -+#define INCLUDE_DEFAULTS \ -+ { \ -+ INCLUDE_DEFAULTS_MUSL_GPP \ -+ INCLUDE_DEFAULTS_MUSL_PREFIX \ -+ INCLUDE_DEFAULTS_MUSL_CROSS \ -+ INCLUDE_DEFAULTS_MUSL_TOOL \ -+ INCLUDE_DEFAULTS_MUSL_NATIVE \ -+ { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 }, \ -+ { 0, 0, 0, 0, 0, 0 } \ -+ } -+#endif + - #if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */ - /* This is a *uclinux* target. We don't define below macros to normal linux - versions, because doing so would require *uclinux* targets to include ---- a/src/gcc/config/linux.opt -+++ b/src/gcc/config/linux.opt -@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc) - Use GNU C library - - muclibc --Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic) -+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl) - Use uClibc C library ++ /* Use large shift amounts. */ ++ VDUP(vector_shift, , int, s, 8, 8, 8); ++ VDUP(vector_shift, , int, s, 16, 4, 16); ++ VDUP(vector_shift, , int, s, 32, 2, 32); ++ VDUP(vector_shift, , int, s, 64, 1, 64); ++ VDUP(vector_shift, q, int, s, 8, 16, 8); ++ VDUP(vector_shift, q, int, s, 16, 8, 16); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 64); + -+mmusl -+Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic) -+Use musl C library ---- a/src/gcc/config/mips/linux.h -+++ b/src/gcc/config/mips/linux.h -@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3. If not see - #define UCLIBC_DYNAMIC_LINKERN32 \ - "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}" - -+#undef MUSL_DYNAMIC_LINKER32 -+#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1" -+#undef MUSL_DYNAMIC_LINKER64 -+#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1" -+#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1" ++#undef CMT ++#define CMT " (large shift amount, negative input)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT); + - #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32" - #define GNU_USER_DYNAMIC_LINKERN32 \ - CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \ -- BIONIC_DYNAMIC_LINKERN32) -+ BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32) ---- a/src/gcc/config/rs6000/linux.h -+++ b/src/gcc/config/rs6000/linux.h -@@ -30,10 +30,14 @@ - #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) - #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) - #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) - #else - #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) - #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) - #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (linux_libc == LIBC_MUSL) - #endif - - /* Determine what functions are present at the runtime; ---- a/src/gcc/config/rs6000/linux64.h -+++ b/src/gcc/config/rs6000/linux64.h -@@ -299,10 +299,14 @@ extern int dot_symbols; - #define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) - #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) - #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) - #else - #define OPTION_GLIBC (linux_libc == LIBC_GLIBC) - #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) - #define OPTION_BIONIC (linux_libc == LIBC_BIONIC) -+#undef OPTION_MUSL -+#define OPTION_MUSL (linux_libc == LIBC_MUSL) - #endif - - /* Determine what functions are present at the runtime; ---- a/src/gcc/configure -+++ b/src/gcc/configure -@@ -1699,7 +1699,8 @@ Optional Packages: - use sysroot as the system root during the build - --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR - --with-specs=SPECS add SPECS to driver command-line processing -- --with-pkgversion=PKG Use PKG in the version string in place of "GCC" -+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro -+ GCC `cat $srcdir/LINARO-VERSION`" - --with-bugurl=URL Direct users to URL to report a bug - --with-multilib-list select multilibs (AArch64, SH and x86-64 only) - --with-gnu-ld assume the C compiler uses GNU ld default=no -@@ -7362,7 +7363,7 @@ if test "${with_pkgversion+set}" = set; then : - *) PKGVERSION="($withval) " ;; - esac - else -- PKGVERSION="(GCC) " -+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " - - fi - -@@ -18162,7 +18163,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 18165 "configure" -+#line 18166 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -18268,7 +18269,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 18271 "configure" -+#line 18272 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -27742,6 +27743,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then : - else - gcc_cv_libc_provides_ssp=no - case "$target" in -+ *-*-musl*) -+ # All versions of musl provide stack protector -+ gcc_cv_libc_provides_ssp=yes;; - *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu) - # glibc 2.4 and later provides __stack_chk_fail and - # either __stack_chk_guard, or TLS access to stack guard canary. -@@ -27774,6 +27778,7 @@ fi - # ) and for now - # simply assert that glibc does provide this, which is true for all - # realistically usable GNU/Hurd configurations. -+ # All supported versions of musl provide it as well - gcc_cv_libc_provides_ssp=yes;; - *-*-darwin* | *-*-freebsd*) - ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail" -@@ -27870,6 +27875,9 @@ case "$target" in - gcc_cv_target_dl_iterate_phdr=no - fi - ;; -+ *-linux-musl*) -+ gcc_cv_target_dl_iterate_phdr=yes -+ ;; - esac - - if test x$gcc_cv_target_dl_iterate_phdr = xyes; then ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -862,7 +862,7 @@ AC_ARG_WITH(specs, - ) - AC_SUBST(CONFIGURE_SPECS) - --ACX_PKGVERSION([GCC]) -+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) - ACX_BUGURL([http://gcc.gnu.org/bugs.html]) - - # Sanity check enable_languages in case someone does not run the toplevel -@@ -5229,6 +5229,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library, - gcc_cv_libc_provides_ssp, - [gcc_cv_libc_provides_ssp=no - case "$target" in -+ *-*-musl*) -+ # All versions of musl provide stack protector -+ gcc_cv_libc_provides_ssp=yes;; - *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu) - # glibc 2.4 and later provides __stack_chk_fail and - # either __stack_chk_guard, or TLS access to stack guard canary. -@@ -5255,6 +5258,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library, - # ) and for now - # simply assert that glibc does provide this, which is true for all - # realistically usable GNU/Hurd configurations. -+ # All supported versions of musl provide it as well - gcc_cv_libc_provides_ssp=yes;; - *-*-darwin* | *-*-freebsd*) - AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes], -@@ -5328,6 +5332,9 @@ case "$target" in - gcc_cv_target_dl_iterate_phdr=no - fi - ;; -+ *-linux-musl*) -+ gcc_cv_target_dl_iterate_phdr=yes -+ ;; - esac - GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR]) - if test x$gcc_cv_target_dl_iterate_phdr = xyes; then ---- a/src/gcc/cp/Make-lang.in -+++ b/src/gcc/cp/Make-lang.in -@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets - # List of targets that can use the generic check- rule and its // variant. - lang_checks += check-g++ - lang_checks_parallelized += check-g++ --# For description see comment above check_gcc_parallelize in gcc/Makefile.in. -+# For description see the check_$lang_parallelize comment in gcc/Makefile.in. - check_g++_parallelize = 10000 - # - # Install hooks: -@@ -221,6 +221,7 @@ c++.mostlyclean: - -rm -f doc/g++.1 - -rm -f cp/*$(objext) - -rm -f cp/*$(coverageexts) -+ -rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext) - c++.clean: - c++.distclean: - -rm -f cp/config.status cp/Makefile ---- a/src/gcc/cppbuiltin.c -+++ b/src/gcc/cppbuiltin.c -@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel) - *patchlevel = s_patchlevel; - } - -+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]" -+ to create Linaro release number YYYYMM and spin version. */ -+static void -+parse_linarover (int *release, int *spin) -+{ -+ static int s_year = -1, s_month, s_spin; ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT); ++ ++ ++ /* Fill input vector with max value, to check saturation on limits */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + -+ if (s_year == -1) -+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3) -+ { -+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month); -+ s_spin = 0; -+ } ++ /* Shift by -1 */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -1); ++ VDUP(vector_shift, , int, s, 32, 2, -1); ++ VDUP(vector_shift, , int, s, 64, 1, -1); ++ VDUP(vector_shift, q, int, s, 8, 16, -1); ++ VDUP(vector_shift, q, int, s, 16, 8, -1); ++ VDUP(vector_shift, q, int, s, 32, 4, -1); ++ VDUP(vector_shift, q, int, s, 64, 2, -1); + -+ if (release) -+ *release = s_year * 100 + s_month; ++#undef CMT ++#define CMT " (max input, shift by -1)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT); + -+ if (spin) -+ *spin = s_spin; -+} - - /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */ - static void - define__GNUC__ (cpp_reader *pfile) - { -- int major, minor, patchlevel; -+ int major, minor, patchlevel, linaro_release, linaro_spin; - - parse_basever (&major, &minor, &patchlevel); -+ parse_linarover (&linaro_release, &linaro_spin); - cpp_define_formatted (pfile, "__GNUC__=%d", major); - cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor); - cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel); - cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string); -+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release); -+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin); - cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED); - cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST); - cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE); ---- a/src/gcc/cprop.c -+++ b/src/gcc/cprop.c -@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x) - return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x)); - } - -+/* Determine whether the rtx X should be treated as a register that can -+ be propagated. Any pseudo-register is fine. */ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT); + -+static bool -+cprop_reg_p (const_rtx x) -+{ -+ return REG_P (x) && !HARD_REGISTER_P (x); -+} + - /* Scan SET present in INSN and add an entry to the hash TABLE. - IMPLICIT is true if it's an implicit set, false otherwise. */ - -@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table, - rtx src = SET_SRC (set); - rtx dest = SET_DEST (set); - -- if (REG_P (dest) -- && ! HARD_REGISTER_P (dest) -+ if (cprop_reg_p (dest) - && reg_available_p (dest, insn) - && can_copy_p (GET_MODE (dest))) - { -@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table, - src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src); - - /* Record sets for constant/copy propagation. */ -- if ((REG_P (src) -+ if ((cprop_reg_p (src) - && src != dest -- && ! HARD_REGISTER_P (src) - && reg_available_p (src, insn)) - || cprop_constant_p (src)) - insert_set_in_table (dest, src, insn, table, implicit); -@@ -821,15 +828,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn) - return success; - } - --/* Find a set of REGNOs that are available on entry to INSN's block. Return -- NULL no such set is found. */ -+/* Find a set of REGNOs that are available on entry to INSN's block. If found, -+ SET_RET[0] will be assigned a set with a register source and SET_RET[1] a -+ set with a constant source. If not found the corresponding entry is set to -+ NULL. */ - --static struct cprop_expr * --find_avail_set (int regno, rtx_insn *insn) -+static void -+find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2]) - { -- /* SET1 contains the last set found that can be returned to the caller for -- use in a substitution. */ -- struct cprop_expr *set1 = 0; -+ set_ret[0] = set_ret[1] = NULL; - - /* Loops are not possible here. To get a loop we would need two sets - available at the start of the block containing INSN. i.e. we would -@@ -869,8 +876,10 @@ find_avail_set (int regno, rtx_insn *insn) - If the source operand changed, we may still use it for the next - iteration of this loop, but we may not use it for substitutions. */ - -- if (cprop_constant_p (src) || reg_not_set_p (src, insn)) -- set1 = set; -+ if (cprop_constant_p (src)) -+ set_ret[1] = set; -+ else if (reg_not_set_p (src, insn)) -+ set_ret[0] = set; - - /* If the source of the set is anything except a register, then - we have reached the end of the copy chain. */ -@@ -881,10 +890,6 @@ find_avail_set (int regno, rtx_insn *insn) - and see if we have an available copy into SRC. */ - regno = REGNO (src); - } -- -- /* SET1 holds the last set that was available and anticipatable at -- INSN. */ -- return set1; - } - - /* Subroutine of cprop_insn that tries to propagate constants into -@@ -1050,40 +1055,40 @@ cprop_insn (rtx_insn *insn) - int changed = 0, changed_this_round; - rtx note; - --retry: -- changed_this_round = 0; -- reg_use_count = 0; -- note_uses (&PATTERN (insn), find_used_regs, NULL); -- -- /* We may win even when propagating constants into notes. */ -- note = find_reg_equal_equiv_note (insn); -- if (note) -- find_used_regs (&XEXP (note, 0), NULL); -- -- for (i = 0; i < reg_use_count; i++) -+ do - { -- rtx reg_used = reg_use_table[i]; -- unsigned int regno = REGNO (reg_used); -- rtx src; -- struct cprop_expr *set; -+ changed_this_round = 0; -+ reg_use_count = 0; -+ note_uses (&PATTERN (insn), find_used_regs, NULL); - -- /* If the register has already been set in this block, there's -- nothing we can do. */ -- if (! reg_not_set_p (reg_used, insn)) -- continue; -+ /* We may win even when propagating constants into notes. */ -+ note = find_reg_equal_equiv_note (insn); -+ if (note) -+ find_used_regs (&XEXP (note, 0), NULL); - -- /* Find an assignment that sets reg_used and is available -- at the start of the block. */ -- set = find_avail_set (regno, insn); -- if (! set) -- continue; -+ for (i = 0; i < reg_use_count; i++) -+ { -+ rtx reg_used = reg_use_table[i]; -+ unsigned int regno = REGNO (reg_used); -+ rtx src_cst = NULL, src_reg = NULL; -+ struct cprop_expr *set[2]; - -- src = set->src; -+ /* If the register has already been set in this block, there's -+ nothing we can do. */ -+ if (! reg_not_set_p (reg_used, insn)) -+ continue; - -- /* Constant propagation. */ -- if (cprop_constant_p (src)) -- { -- if (constprop_register (reg_used, src, insn)) -+ /* Find an assignment that sets reg_used and is available -+ at the start of the block. */ -+ find_avail_set (regno, insn, set); -+ if (set[0]) -+ src_reg = set[0]->src; -+ if (set[1]) -+ src_cst = set[1]->src; ++ /* Use large shift amounts */ ++ VDUP(vector_shift, , int, s, 8, 8, 8); ++ VDUP(vector_shift, , int, s, 16, 4, 16); ++ VDUP(vector_shift, , int, s, 32, 2, 32); ++ VDUP(vector_shift, , int, s, 64, 1, 64); ++ VDUP(vector_shift, q, int, s, 8, 16, 8); ++ VDUP(vector_shift, q, int, s, 16, 8, 16); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 64); ++ ++#undef CMT ++#define CMT " (max input, large shift amount)" ++ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT); ++ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT); + -+ /* Constant propagation. */ -+ if (src_cst && cprop_constant_p (src_cst) -+ && constprop_register (reg_used, src_cst, insn)) - { - changed_this_round = changed = 1; - global_const_prop_count++; -@@ -1093,18 +1098,16 @@ retry: - "GLOBAL CONST-PROP: Replacing reg %d in ", regno); - fprintf (dump_file, "insn %d with constant ", - INSN_UID (insn)); -- print_rtl (dump_file, src); -+ print_rtl (dump_file, src_cst); - fprintf (dump_file, "\n"); - } - if (insn->deleted ()) - return 1; - } -- } -- else if (REG_P (src) -- && REGNO (src) >= FIRST_PSEUDO_REGISTER -- && REGNO (src) != regno) -- { -- if (try_replace_reg (reg_used, src, insn)) -+ /* Copy propagation. */ -+ else if (src_reg && cprop_reg_p (src_reg) -+ && REGNO (src_reg) != regno -+ && try_replace_reg (reg_used, src_reg, insn)) - { - changed_this_round = changed = 1; - global_copy_prop_count++; -@@ -1113,7 +1116,7 @@ retry: - fprintf (dump_file, - "GLOBAL COPY-PROP: Replacing reg %d in insn %d", - regno, INSN_UID (insn)); -- fprintf (dump_file, " with reg %d\n", REGNO (src)); -+ fprintf (dump_file, " with reg %d\n", REGNO (src_reg)); - } - - /* The original insn setting reg_used may or may not now be -@@ -1123,12 +1126,10 @@ retry: - and made things worse. */ - } - } -- -- /* If try_replace_reg simplified the insn, the regs found -- by find_used_regs may not be valid anymore. Start over. */ -- if (changed_this_round) -- goto retry; - } -+ /* If try_replace_reg simplified the insn, the regs found by find_used_regs -+ may not be valid anymore. Start over. */ -+ while (changed_this_round); - - if (changed && DEBUG_INSN_P (insn)) - return 0; -@@ -1191,7 +1192,7 @@ do_local_cprop (rtx x, rtx_insn *insn) - /* Rule out USE instructions and ASM statements as we don't want to - change the hard registers mentioned. */ - if (REG_P (x) -- && (REGNO (x) >= FIRST_PSEUDO_REGISTER -+ && (cprop_reg_p (x) - || (GET_CODE (PATTERN (insn)) != USE - && asm_noperands (PATTERN (insn)) < 0))) - { -@@ -1207,7 +1208,7 @@ do_local_cprop (rtx x, rtx_insn *insn) - - if (cprop_constant_p (this_rtx)) - newcnst = this_rtx; -- if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER -+ if (cprop_reg_p (this_rtx) - /* Don't copy propagate if it has attached REG_EQUIV note. - At this point this only function parameters should have - REG_EQUIV notes and if the argument slot is used somewhere -@@ -1328,9 +1329,8 @@ implicit_set_cond_p (const_rtx cond) - if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE) - return false; - -- /* The first operand of COND must be a pseudo-reg. */ -- if (! REG_P (XEXP (cond, 0)) -- || HARD_REGISTER_P (XEXP (cond, 0))) -+ /* The first operand of COND must be a register we can propagate. */ -+ if (!cprop_reg_p (XEXP (cond, 0))) - return false; - - /* The second operand of COND must be a suitable constant. */ ---- a/src/gcc/df-core.c -+++ b/src/gcc/df-core.c -@@ -642,7 +642,6 @@ void - df_finish_pass (bool verify ATTRIBUTE_UNUSED) - { - int i; -- int removed = 0; - - #ifdef ENABLE_DF_CHECKING - int saved_flags; -@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED) - saved_flags = df->changeable_flags; - #endif - -- for (i = 0; i < df->num_problems_defined; i++) -+ /* We iterate over problems by index as each problem removed will -+ lead to problems_in_order to be reordered. */ -+ for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++) - { -- struct dataflow *dflow = df->problems_in_order[i]; -- struct df_problem *problem = dflow->problem; -+ struct dataflow *dflow = df->problems_by_index[i]; - -- if (dflow->optional_p) -- { -- gcc_assert (problem->remove_problem_fun); -- (problem->remove_problem_fun) (); -- df->problems_in_order[i] = NULL; -- df->problems_by_index[problem->id] = NULL; -- removed++; -- } -+ if (dflow && dflow->optional_p) -+ df_remove_problem (dflow); - } -- df->num_problems_defined -= removed; - - /* Clear all of the flags. */ - df->changeable_flags = 0; ---- a/src/gcc/fortran/Make-lang.in -+++ b/src/gcc/fortran/Make-lang.in -@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets - check-fortran-subtargets : check-gfortran-subtargets - lang_checks += check-gfortran - lang_checks_parallelized += check-gfortran --# For description see comment above check_gcc_parallelize in gcc/Makefile.in. -+# For description see the check_$lang_parallelize comment in gcc/Makefile.in. - check_gfortran_parallelize = 10000 - - # GFORTRAN documentation. -@@ -275,7 +275,7 @@ fortran.uninstall: - # We just have to delete files specific to us. - - fortran.mostlyclean: -- -rm -f f951$(exeext) -+ -rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext) - -rm -f fortran/*.o - - fortran.clean: ---- a/src/gcc/genpreds.c -+++ b/src/gcc/genpreds.c -@@ -640,12 +640,14 @@ struct constraint_data - const char *regclass; /* for register constraints */ - rtx exp; /* for other constraints */ - unsigned int lineno; /* line of definition */ -- unsigned int is_register : 1; -- unsigned int is_const_int : 1; -- unsigned int is_const_dbl : 1; -- unsigned int is_extra : 1; -- unsigned int is_memory : 1; -- unsigned int is_address : 1; -+ unsigned int is_register : 1; -+ unsigned int is_const_int : 1; -+ unsigned int is_const_dbl : 1; -+ unsigned int is_extra : 1; -+ unsigned int is_memory : 1; -+ unsigned int is_address : 1; -+ unsigned int maybe_allows_reg : 1; -+ unsigned int maybe_allows_mem : 1; - }; - - /* Overview of all constraints beginning with a given letter. */ -@@ -691,6 +693,9 @@ static unsigned int satisfied_start; - static unsigned int const_int_start, const_int_end; - static unsigned int memory_start, memory_end; - static unsigned int address_start, address_end; -+static unsigned int maybe_allows_none_start, maybe_allows_none_end; -+static unsigned int maybe_allows_reg_start, maybe_allows_reg_end; -+static unsigned int maybe_allows_mem_start, maybe_allows_mem_end; - - /* Convert NAME, which contains angle brackets and/or underscores, to - a string that can be used as part of a C identifier. The string -@@ -711,6 +716,34 @@ mangle (const char *name) - return XOBFINISH (rtl_obstack, const char *); - } - -+/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP -+ maybe allows a MEM. Bits should be clear only when we are sure it -+ will not allow a REG/SUBREG or a MEM. */ -+static int -+compute_maybe_allows (rtx exp) ++ ++ /* Check 64 bits saturation. */ ++ VDUP(vector, , int, s, 64, 1, -10); ++ VDUP(vector_shift, , int, s, 64, 1, 64); ++ VDUP(vector, q, int, s, 64, 2, 10); ++ VDUP(vector_shift, q, int, s, 64, 2, 64); ++ ++#undef CMT ++#define CMT " (check saturation on 64 bits)" ++ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT); ++ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT); ++ ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT); ++} ++ ++int main (void) +{ -+ switch (GET_CODE (exp)) -+ { -+ case IF_THEN_ELSE: -+ /* Conservative answer is like IOR, of the THEN and ELSE branches. */ -+ return compute_maybe_allows (XEXP (exp, 1)) -+ | compute_maybe_allows (XEXP (exp, 2)); -+ case AND: -+ return compute_maybe_allows (XEXP (exp, 0)) -+ & compute_maybe_allows (XEXP (exp, 1)); -+ case IOR: -+ return compute_maybe_allows (XEXP (exp, 0)) -+ | compute_maybe_allows (XEXP (exp, 1)); -+ case MATCH_CODE: -+ if (*XSTR (exp, 1) == '\0') -+ return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0) -+ | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0); -+ /* FALLTHRU */ -+ default: -+ return 3; -+ } ++ exec_vqshl (); ++ return 0; +} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c +@@ -0,0 +1,234 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, ++ 0xd0, 0xd4, 0xd8, 0xdc }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, ++ 0xd0, 0xd4, 0xd8, 0xdc, ++ 0xe0, 0xe4, 0xe8, 0xec, ++ 0xf0, 0xf4, 0xf8, 0xfc }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, ++ 0xffe8, 0xffea, 0xffec, 0xffee }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, ++ 0xffffffe4, 0xffffffe6 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; + - /* Add one constraint, of any sort, to the tables. NAME is its name; - REGCLASS is the register class, if any; EXP is the expression to - test, if any; IS_MEMORY and IS_ADDRESS indicate memory and address -@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass, - c->is_extra = !(regclass || is_const_int || is_const_dbl); - c->is_memory = is_memory; - c->is_address = is_address; -+ int maybe_allows = 3; -+ if (exp) -+ maybe_allows = compute_maybe_allows (exp); -+ c->maybe_allows_reg = (maybe_allows & 1) != 0; -+ c->maybe_allows_mem = (maybe_allows & 2) != 0; - - c->next_this_letter = *slot; - *slot = c; -@@ -940,8 +978,30 @@ choose_enum_order (void) - enum_order[next++] = c; - address_end = next; - -+ maybe_allows_none_start = next; -+ FOR_ALL_CONSTRAINTS (c) -+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address -+ && !c->maybe_allows_reg && !c->maybe_allows_mem) -+ enum_order[next++] = c; -+ maybe_allows_none_end = next; ++/* Expected values of cumulative_saturation flag with max positive input. */ ++int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1; + -+ maybe_allows_reg_start = next; -+ FOR_ALL_CONSTRAINTS (c) -+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address -+ && c->maybe_allows_reg && !c->maybe_allows_mem) -+ enum_order[next++] = c; -+ maybe_allows_reg_end = next; ++/* Expected results with max positive input. */ ++VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff, ++ 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff, ++ 0x7fffffffffffffff }; ++VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; + -+ maybe_allows_mem_start = next; -+ FOR_ALL_CONSTRAINTS (c) -+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address -+ && !c->maybe_allows_reg && c->maybe_allows_mem) -+ enum_order[next++] = c; -+ maybe_allows_mem_end = next; ++#define INSN vqshl ++#define TEST_MSG "VQSHL_N/VQSHLQ_N" + - FOR_ALL_CONSTRAINTS (c) -- if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address) -+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address -+ && c->maybe_allows_reg && c->maybe_allows_mem) - enum_order[next++] = c; - gcc_assert (next == num_constraints); - } -@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end) - "}\n\n", name); - } - -+/* Write a definition for insn_extra_constraint_allows_reg_mem function. */ -+static void -+write_allows_reg_mem_function (void) ++#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) ++#define FNNAME(NAME) FNNAME1(NAME) ++ ++FNNAME (INSN) +{ -+ printf ("static inline void\n" -+ "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n" -+ "\t\t\t\t bool *allows_reg, bool *allows_mem)\n" -+ "{\n"); -+ if (maybe_allows_none_start != maybe_allows_none_end) -+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" -+ " return;\n", -+ enum_order[maybe_allows_none_start]->c_name, -+ enum_order[maybe_allows_none_end - 1]->c_name); -+ if (maybe_allows_reg_start != maybe_allows_reg_end) -+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" -+ " {\n" -+ " *allows_reg = true;\n" -+ " return;\n" -+ " }\n", -+ enum_order[maybe_allows_reg_start]->c_name, -+ enum_order[maybe_allows_reg_end - 1]->c_name); -+ if (maybe_allows_mem_start != maybe_allows_mem_end) -+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n" -+ " {\n" -+ " *allows_mem = true;\n" -+ " return;\n" -+ " }\n", -+ enum_order[maybe_allows_mem_start]->c_name, -+ enum_order[maybe_allows_mem_end - 1]->c_name); -+ printf (" (void) c;\n" -+ " *allows_reg = true;\n" -+ " *allows_mem = true;\n" -+ "}\n\n"); -+} ++ /* Basic test: v2=vqshl_n(v1,v), then store the result. */ ++#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ ++ clean_results (); ++ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose shift amount arbitrarily. */ ++#define CMT "" ++ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT); ++ ++ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++ ++ ++ /* Fill input vector with max value, to check saturation on limits. */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++#undef CMT ++#define CMT " (with max input)" ++ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT); ++ ++ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT); ++ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT); + - /* VEC is a list of key/value pairs, with the keys being lower bounds - of a range. Output a decision tree that handles the keys covered by - [VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s. -@@ -1326,6 +1421,7 @@ write_tm_preds_h (void) - memory_start, memory_end); - write_range_function ("insn_extra_address_constraint", - address_start, address_end); -+ write_allows_reg_mem_function (); - - if (constraint_max_namelen > 1) - { ---- a/src/gcc/go/Make-lang.in -+++ b/src/gcc/go/Make-lang.in -@@ -197,6 +197,7 @@ go.uninstall: - go.mostlyclean: - -rm -f go/*$(objext) - -rm -f go/*$(coverageexts) -+ -rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext) - go.clean: - go.distclean: - go.maintainer-clean: ---- a/src/gcc/ira-costs.c -+++ b/src/gcc/ira-costs.c -@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref) - rtx dest = SET_DEST (set); - rtx src = SET_SRC (set); - -- dest = SET_DEST (set); -- src = SET_SRC (set); - if (GET_CODE (dest) == SUBREG - && (GET_MODE_SIZE (GET_MODE (dest)) - == GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))) ---- a/src/gcc/jit/Make-lang.in -+++ b/src/gcc/jit/Make-lang.in -@@ -285,6 +285,10 @@ jit.uninstall: - # We just have to delete files specific to us. - - jit.mostlyclean: -+ -rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK) -+ -rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME) -+ -rm -f $(LIBGCCJIT_SONAME) -+ -rm -f $(jit_OBJS) - - jit.clean: - ---- a/src/gcc/loop-invariant.c -+++ b/src/gcc/loop-invariant.c -@@ -740,8 +740,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on, - enough to not regress 410.bwaves either (by still moving reg+reg - invariants). - See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */ -- inv->cheap_address = address_cost (SET_SRC (set), word_mode, -- ADDR_SPACE_GENERIC, speed) < 3; -+ if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set)))) -+ inv->cheap_address = address_cost (SET_SRC (set), word_mode, -+ ADDR_SPACE_GENERIC, speed) < 3; -+ else -+ inv->cheap_address = false; - } - else - { -@@ -1174,6 +1177,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed, - } - - if (!inv->cheap_address -+ || inv->def->n_uses == 0 - || inv->def->n_addr_uses < inv->def->n_uses) - (*comp_cost) += inv->cost * inv->eqno; - -@@ -1512,6 +1516,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group) - return 1; - } - -+/* Whether invariant INV setting REG can be moved out of LOOP, at the end of -+ the block preceding its header. */ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT); ++} + -+static bool -+can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg) ++int main (void) +{ -+ df_ref def, use; -+ unsigned int dest_regno, defs_in_loop_count = 0; -+ rtx_insn *insn = inv->insn; -+ basic_block bb = BLOCK_FOR_INSN (inv->insn); ++ exec_vqshl_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c +@@ -0,0 +1,263 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* We ignore hard register and memory access for cost and complexity reasons. -+ Hard register are few at this stage and expensive to consider as they -+ require building a separate data flow. Memory access would require using -+ df_simulate_* and can_move_insns_across functions and is more complex. */ -+ if (!REG_P (reg) || HARD_REGISTER_P (reg)) -+ return false; ++/* Expected values of cumulative_saturation flag with negative ++ input. */ ++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; + -+ /* Check whether the set is always executed. We could omit this condition if -+ we know that the register is unused outside of the loop, but it does not -+ seem worth finding out. */ -+ if (!inv->always_executed) -+ return false; ++/* Expected results with negative input. */ ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 }; + -+ /* Check that all uses that would be dominated by def are already dominated -+ by it. */ -+ dest_regno = REGNO (reg); -+ for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use)) -+ { -+ rtx_insn *use_insn; -+ basic_block use_bb; ++/* Expected values of cumulative_saturation flag with shift by 1. */ ++int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0; + -+ use_insn = DF_REF_INSN (use); -+ use_bb = BLOCK_FOR_INSN (use_insn); ++/* Expected results with shift by 1. */ ++VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe, ++ 0xfe, 0xfe, 0xfe, 0xfe }; ++VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; ++VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe }; ++VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, ++ 0xfe, 0xfe, 0xfe, 0xfe, ++ 0xfe, 0xfe, 0xfe, 0xfe, ++ 0xfe, 0xfe, 0xfe, 0xfe }; ++VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe, ++ 0xfffe, 0xfffe, 0xfffe, 0xfffe }; ++VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe, ++ 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe, ++ 0xfffffffffffffffe }; + -+ /* Ignore instruction considered for moving. */ -+ if (use_insn == insn) -+ continue; ++/* Expected values of cumulative_saturation flag with shift by 2. */ ++int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1; + -+ /* Don't consider uses outside loop. */ -+ if (!flow_bb_inside_loop_p (loop, use_bb)) -+ continue; ++/* Expected results with shift by 2. */ ++VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff, ++ 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff, ++ 0xffffffffffffffff }; + -+ /* Don't move if a use is not dominated by def in insn. */ -+ if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn)) -+ return false; -+ if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb)) -+ return false; -+ } ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; ++int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + -+ /* Check for other defs. Any other def in the loop might reach a use -+ currently reached by the def in insn. */ -+ for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def)) -+ { -+ basic_block def_bb = DF_REF_BB (def); ++/* Expected results. */ ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0, ++ 0xa0, 0xa0, 0xa0, 0xa0, ++ 0xa0, 0xa0, 0xa0, 0xa0, ++ 0xa0, 0xa0, 0xa0, 0xa0 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180, ++ 0x180, 0x180, 0x180, 0x180 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 }; + -+ /* Defs in exit block cannot reach a use they weren't already. */ -+ if (single_succ_p (def_bb)) -+ { -+ basic_block def_bb_succ; + -+ def_bb_succ = single_succ (def_bb); -+ if (!flow_bb_inside_loop_p (loop, def_bb_succ)) -+ continue; -+ } ++#define INSN vqshlu ++#define TEST_MSG "VQSHLU_N/VQSHLUQ_N" + -+ if (++defs_in_loop_count > 1) -+ return false; -+ } ++#define FNNAME1(NAME) void exec_ ## NAME ## _n(void) ++#define FNNAME(NAME) FNNAME1(NAME) + -+ return true; -+} ++FNNAME (INSN) ++{ ++ /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ ++#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \ ++ VECT_VAR(vector_res, T3, W, N) = \ ++ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ ++ VECT_VAR(vector_res, T3, W, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + - /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false - otherwise. */ - -@@ -1545,11 +1622,8 @@ move_invariant_reg (struct loop *loop, unsigned invno) - } - } - -- /* Move the set out of the loop. If the set is always executed (we could -- omit this condition if we know that the register is unused outside of -- the loop, but it does not seem worth finding out) and it has no uses -- that would not be dominated by it, we may just move it (TODO). -- Otherwise we need to create a temporary register. */ -+ /* If possible, just move the set out of the loop. Otherwise, we -+ need to create a temporary register. */ - set = single_set (inv->insn); - reg = dest = SET_DEST (set); - if (GET_CODE (reg) == SUBREG) -@@ -1557,19 +1631,25 @@ move_invariant_reg (struct loop *loop, unsigned invno) - if (REG_P (reg)) - regno = REGNO (reg); - -- reg = gen_reg_rtx_and_attrs (dest); -+ if (!can_move_invariant_reg (loop, inv, reg)) -+ { -+ reg = gen_reg_rtx_and_attrs (dest); - -- /* Try replacing the destination by a new pseudoregister. */ -- validate_change (inv->insn, &SET_DEST (set), reg, true); -+ /* Try replacing the destination by a new pseudoregister. */ -+ validate_change (inv->insn, &SET_DEST (set), reg, true); - -- /* As well as all the dominated uses. */ -- replace_uses (inv, reg, true); -+ /* As well as all the dominated uses. */ -+ replace_uses (inv, reg, true); - -- /* And validate all the changes. */ -- if (!apply_change_group ()) -- goto fail; -+ /* And validate all the changes. */ -+ if (!apply_change_group ()) -+ goto fail; - -- emit_insn_after (gen_move_insn (dest, reg), inv->insn); -+ emit_insn_after (gen_move_insn (dest, reg), inv->insn); -+ } -+ else if (dump_file) -+ fprintf (dump_file, "Invariant %d moved without introducing a new " -+ "temporary register\n", invno); - reorder_insns (inv->insn, inv->insn, BB_END (preheader)); - - /* If there is a REG_EQUAL note on the insn we just moved, and the ---- a/src/gcc/lra-constraints.c -+++ b/src/gcc/lra-constraints.c -@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, - { - HARD_REG_SET temp; - -- // ??? Is this assert right -- // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass])); -+ lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set)); - COPY_HARD_REG_SET (temp, set); - AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs); - return (hard_reg_set_subset_p ---- a/src/gcc/objc/Make-lang.in -+++ b/src/gcc/objc/Make-lang.in -@@ -114,6 +114,7 @@ objc.uninstall: - objc.mostlyclean: - -rm -f objc/*$(objext) objc/xforward objc/fflags - -rm -f objc/*$(coverageexts) -+ -rm -f cc1obj$(exeext) - objc.clean: objc.mostlyclean - -rm -rf objc-headers - objc.distclean: ---- a/src/gcc/objcp/Make-lang.in -+++ b/src/gcc/objcp/Make-lang.in -@@ -142,6 +142,7 @@ obj-c++.uninstall: - obj-c++.mostlyclean: - -rm -f objcp/*$(objext) - -rm -f objcp/*$(coverageexts) -+ -rm -f cc1objplus$(exeext) - obj-c++.clean: obj-c++.mostlyclean - obj-c++.distclean: - -rm -f objcp/config.status objcp/Makefile ---- a/src/gcc/optabs.c -+++ b/src/gcc/optabs.c -@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, - { - struct expand_operand ops[2]; - rtx rtx_op0, rtx_op1; -+ machine_mode m0, m1; - enum rtx_code rcode = get_rtx_code (tcode, unsignedp); - - gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison); - -- /* Expand operands. */ -+ /* Expand operands. For vector types with scalar modes, e.g. where int64x1_t -+ has mode DImode, this can produce a constant RTX of mode VOIDmode; in such -+ cases, use the original mode. */ - rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)), - EXPAND_STACK_PARM); -+ m0 = GET_MODE (rtx_op0); -+ if (m0 == VOIDmode) -+ m0 = TYPE_MODE (TREE_TYPE (t_op0)); ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + - rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)), - EXPAND_STACK_PARM); -+ m1 = GET_MODE (rtx_op1); -+ if (m1 == VOIDmode) -+ m1 = TYPE_MODE (TREE_TYPE (t_op1)); - -- create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0)); -- create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1)); -+ create_input_operand (&ops[0], rtx_op0, m0); -+ create_input_operand (&ops[1], rtx_op1, m1); - if (!maybe_legitimize_operands (icode, 4, 2, ops)) - gcc_unreachable (); - return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value); ---- a/src/gcc/params.def -+++ b/src/gcc/params.def -@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH, - "Maximum depth of search in the dominator tree for expressions to hoist", - 30, 0, 0) - ++#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + -+/* When synthesizing expnonentiation by a real constant operations using square -+ roots, this controls how deep sqrt chains we are willing to generate. */ -+DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH, -+ "max-pow-sqrt-depth", -+ "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant", -+ 5, 1, 32) + - /* This parameter limits the number of insns in a loop that will be unrolled, - and by how much the loop is unrolled. - ---- a/src/gcc/rtlanal.c -+++ b/src/gcc/rtlanal.c -@@ -104,7 +104,10 @@ generic_subrtx_iterator ::add_single_to_queue (array_type &array, - return base; - } - gcc_checking_assert (i == LOCAL_ELEMS); -- vec_safe_grow (array.heap, i + 1); -+ /* A previous iteration might also have moved from the stack to the -+ heap, in which case the heap array will already be big enough. */ -+ if (vec_safe_length (array.heap) <= i) -+ vec_safe_grow (array.heap, i + 1); - base = array.heap->address (); - memcpy (base, array.stack, sizeof (array.stack)); - base[LOCAL_ELEMS] = x; ---- a/src/gcc/simplify-rtx.c -+++ b/src/gcc/simplify-rtx.c -@@ -1171,7 +1171,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) - = (float_truncate:SF foo:DF). - - (float_truncate:DF (float_extend:XF foo:SF)) -- = (float_extend:SF foo:DF). */ -+ = (float_extend:DF foo:SF). */ - if ((GET_CODE (op) == FLOAT_TRUNCATE - && flag_unsafe_math_optimizations) - || GET_CODE (op) == FLOAT_EXTEND) -@@ -1183,14 +1183,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) - XEXP (op, 0), mode); - - /* (float_truncate (float x)) is (float x) */ -- if (GET_CODE (op) == FLOAT -+ if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT) - && (flag_unsafe_math_optimizations - || (SCALAR_FLOAT_MODE_P (GET_MODE (op)) - && ((unsigned)significand_size (GET_MODE (op)) - >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))) - - num_sign_bit_copies (XEXP (op, 0), - GET_MODE (XEXP (op, 0)))))))) -- return simplify_gen_unary (FLOAT, mode, -+ return simplify_gen_unary (GET_CODE (op), mode, - XEXP (op, 0), - GET_MODE (XEXP (op, 0))); - -@@ -1221,7 +1221,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) - rounding can't happen. - */ - if (GET_CODE (op) == FLOAT_EXTEND -- || (GET_CODE (op) == FLOAT -+ || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT) - && SCALAR_FLOAT_MODE_P (GET_MODE (op)) - && ((unsigned)significand_size (GET_MODE (op)) - >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))) ---- a/src/gcc/stmt.c -+++ b/src/gcc/stmt.c -@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num, - else if (insn_extra_memory_constraint (cn)) - *allows_mem = true; - else -- { -- /* Otherwise we can't assume anything about the nature of -- the constraint except that it isn't purely registers. -- Treat it like "g" and hope for the best. */ -- *allows_reg = true; -- *allows_mem = true; -- } -+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem); - break; - } - -@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num, - else if (insn_extra_memory_constraint (cn)) - *allows_mem = true; - else -- { -- /* Otherwise we can't assume anything about the nature of -- the constraint except that it isn't purely registers. -- Treat it like "g" and hope for the best. */ -- *allows_reg = true; -- *allows_mem = true; -- } -+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem); - break; - } - ---- a/src/gcc/target.def -+++ b/src/gcc/target.def -@@ -1975,7 +1975,7 @@ merging.", - DEFHOOKPOD - (attribute_table, - "If defined, this target hook points to an array of @samp{struct\n\ --attribute_spec} (defined in @file{tree.h}) specifying the machine\n\ -+attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\ - specific attributes for this target and some of the restrictions on the\n\ - entities to which these attributes are applied and the arguments they\n\ - take.", ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c -@@ -0,0 +1,34 @@ -+/* PR target/65648 */ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + -+int a = 0, *b = 0, c = 0; -+static int d = 0; -+short e = 1; -+static long long f = 0; -+long long *i = &f; -+unsigned char j = 0; ++ clean_results (); + -+__attribute__((noinline, noclone)) void -+foo (int x, int *y) -+{ -+ asm volatile ("" : : "r" (x), "r" (y) : "memory"); -+} ++ /* Fill input vector with negative values, to check saturation on ++ limits. */ ++ VDUP(vector, , int, s, 8, 8, -1); ++ VDUP(vector, , int, s, 16, 4, -2); ++ VDUP(vector, , int, s, 32, 2, -3); ++ VDUP(vector, , int, s, 64, 1, -4); ++ VDUP(vector, q, int, s, 8, 16, -1); ++ VDUP(vector, q, int, s, 16, 8, -2); ++ VDUP(vector, q, int, s, 32, 4, -3); ++ VDUP(vector, q, int, s, 64, 2, -4); + -+__attribute__((noinline, noclone)) void -+bar (const char *x, long long y) -+{ -+ asm volatile ("" : : "r" (x), "r" (&y) : "memory"); -+ if (y != 0) -+ __builtin_abort (); -+} ++ /* Choose shift amount arbitrarily. */ ++#define CMT " (negative input)" ++ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT); + -+int -+main () -+{ -+ int k = 0; -+ b = &k; -+ j = (!a) - (c <= e); -+ *i = j; -+ foo (a, &k); -+ bar ("", f); -+ return 0; -+} ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/loop-8.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); + -+void -+f (int *a, int *b) -+{ -+ int i; ++ ++ /* Fill input vector with max value, to check saturation on ++ limits. */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL); + -+ for (i = 0; i < 100; i++) -+ { -+ int d = 42; ++ /* shift by 1. */ ++#undef CMT ++#define CMT " (shift by 1)" ++ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT); + -+ a[i] = d; -+ if (i % 2) -+ d = i; -+ b[i] = d; -+ } -+} ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT); + -+/* Load of 42 is moved out of the loop, introducing a new pseudo register. */ -+/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */ -+/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */ -+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ ++ /* shift by 2 to force saturation. */ ++#undef CMT ++#define CMT " (shift by 2)" ++ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT); + ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/loop-9.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT); + -+void -+f (double *a) -+{ -+ int i; -+ for (i = 0; i < 100; i++) -+ a[i] = 18.4242; -+} ++ ++ /* Fill input vector with positive values, to check normal case. */ ++ VDUP(vector, , int, s, 8, 8, 1); ++ VDUP(vector, , int, s, 16, 4, 2); ++ VDUP(vector, , int, s, 32, 2, 3); ++ VDUP(vector, , int, s, 64, 1, 4); ++ VDUP(vector, q, int, s, 8, 16, 5); ++ VDUP(vector, q, int, s, 16, 8, 6); ++ VDUP(vector, q, int, s, 32, 4, 7); ++ VDUP(vector, q, int, s, 64, 2, 8); + -+/* Load of x is moved out of the loop. */ -+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */ -+/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */ -+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ ++ /* Arbitrary shift amount. */ ++#undef CMT ++#define CMT "" ++ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT); ++ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT); + ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c -@@ -0,0 +1,43 @@ -+/* { dg-do compile { target x86_64-*-* } } */ -+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */ -+/* NOTE: The target list above could be extended to other targets that have -+ conditional moves, but don't have zero registers. */ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++} + -+enum test_type ++int main (void) +{ -+ TYPE0, -+ TYPE1 -+}; ++ exec_vqshlu_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c +@@ -0,0 +1,177 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+struct type_node -+{ -+ enum test_type type; -+}; ++/* Expected values of cumulative_saturation flag. */ ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + -+struct test_ref -+{ -+ struct type_node *referring; -+}; ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, ++ 0xfa, 0xfa, 0xfb, 0xfb }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + -+struct test_node -+{ -+ struct test_node *next; -+}; ++/* Expected values of cumulative_saturation flag with max input value ++ shifted by 3. */ ++int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1; + -+int iterate (struct test_node *, unsigned, struct test_ref **); ++/* Expected results with max input value shifted by 3. */ ++VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + -+int -+loop_invar (struct test_node *node) -+{ -+ struct test_ref *ref; ++/* Expected values of cumulative_saturation flag with max input value ++ shifted by type size. */ ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0; ++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0; + -+ for (unsigned i = 0; iterate (node, i, &ref); i++) -+ if (loop_invar ((ref->referring && ref->referring->type == TYPE0) -+ ? ((struct test_node *) (ref->referring)) : 0)) -+ return 1; ++/* Expected results with max input value shifted by type size. */ ++VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, ++ 0x7f, 0x7f, 0x7f, 0x7f }; ++VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff, ++ 0x7fff, 0x7fff }; ++VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; ++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + -+ return 0; -+} ++#define INSN vqshrn_n ++#define TEST_MSG "VQSHRN_N" + -+/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */ -+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c -@@ -0,0 +1,6 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) + -+#define EXPN (-6 * (0.5*0.5*0.5*0.5)) ++FNNAME (INSN) ++{ ++ /* Basic test: y=vqshrn_n(x,v), then store the result. */ ++#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ ++ VECT_VAR(vector_res, T1, W2, N) = \ ++ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ ++ VECT_VAR(vector_res, T1, W2, N)); \ ++ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + -+#include "pow-sqrt.x" ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c -@@ -0,0 +1,5 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */ ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + -+#define EXPN (-5.875) -+#include "pow-sqrt.x" ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c -@@ -0,0 +1,5 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */ ++#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + -+#define EXPN (1.25) -+#include "pow-sqrt.x" ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x -@@ -0,0 +1,30 @@ + -+extern void abort (void); ++ /* vector is twice as large as vector_res. */ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ DECL_VARIABLE(vector, uint, 16, 8); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, uint, 64, 2); + ++ DECL_VARIABLE(vector_res, int, 8, 8); ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); + -+__attribute__((noinline)) double -+real_pow (double x, double pow_exp) -+{ -+ return __builtin_pow (x, pow_exp); -+} ++ clean_results (); + -+#define EPS (0.000000000000000000001) ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ VLOAD(vector, buffer, q, int, s, 64, 2); ++ VLOAD(vector, buffer, q, uint, u, 16, 8); ++ VLOAD(vector, buffer, q, uint, u, 32, 4); ++ VLOAD(vector, buffer, q, uint, u, 64, 2); + -+#define SYNTH_POW(X, Y) __builtin_pow (X, Y) -+volatile double arg; ++ /* Choose shift amount arbitrarily. */ ++#define CMT "" ++ TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT); ++ TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT); ++ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT); + -+int -+main (void) -+{ -+ double i_arg = 0.1; ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); + -+ for (arg = i_arg; arg < 100.0; arg += 1.0) -+ { -+ double synth_res = SYNTH_POW (arg, EXPN); -+ double real_res = real_pow (arg, EXPN); + -+ if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS) -+ abort (); -+ } -+ return 0; -+} ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "" } */ -+/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */ ++ /* Use max possible value as input. */ ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + -+void -+f0a (char *result, char *arg1, char *arg4, char temp_6) -+{ -+ int idx = 0; -+ for (idx = 0; idx < 416; idx += 1) -+ result[idx] = (arg1[idx] + arg4[idx]) * temp_6; -+} ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c -@@ -0,0 +1,54 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ ++#undef CMT ++#define CMT " (check saturation: shift by 3)" ++ TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT); ++ TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT); ++ TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT); ++ TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT); ++ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT); ++ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT); + -+void foo (double *p) -+{ -+ int i; -+ for (i = -20000; i < 200000; i+= 40) -+ { -+ p[i+0] = 1.0; -+ p[i+1] = 1.0; -+ p[i+2] = 1.0; -+ p[i+3] = 1.0; -+ p[i+4] = 1.0; -+ p[i+5] = 1.0; -+ p[i+6] = 1.0; -+ p[i+7] = 1.0; -+ p[i+8] = 1.0; -+ p[i+9] = 1.0; -+ p[i+10] = 1.0; -+ p[i+11] = 1.0; -+ p[i+12] = 1.0; -+ p[i+13] = 1.0; -+ p[i+14] = 1.0; -+ p[i+15] = 1.0; -+ p[i+16] = 1.0; -+ p[i+17] = 1.0; -+ p[i+18] = 1.0; -+ p[i+19] = 1.0; -+ p[i+20] = 1.0; -+ p[i+21] = 1.0; -+ p[i+22] = 1.0; -+ p[i+23] = 1.0; -+ p[i+24] = 1.0; -+ p[i+25] = 1.0; -+ p[i+26] = 1.0; -+ p[i+27] = 1.0; -+ p[i+28] = 1.0; -+ p[i+29] = 1.0; -+ p[i+30] = 1.0; -+ p[i+31] = 1.0; -+ p[i+32] = 1.0; -+ p[i+33] = 1.0; -+ p[i+34] = 1.0; -+ p[i+35] = 1.0; -+ p[i+36] = 1.0; -+ p[i+37] = 1.0; -+ p[i+38] = 1.0; -+ p[i+39] = 1.0; -+ } -+} ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT); + -+/* We should groups address type IV uses. */ -+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } } */ -+/* { dg-final { cleanup-tree-dump "ivopts" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c -@@ -12,6 +12,8 @@ - - vf2_t vf2 = (vf2_t){ 17.f, 18.f }; - vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead }; -+vlf1_t vlf1 = (vlf1_t) { 17.0 }; + - union int128_t qword; - - int *int_ptr = (int *)0xabcdef0123456789ULL; -@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11, long double, 98765432123456789.987654321L, Q0, flat) - FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64) - FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128) - FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat) -+FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat) - #endif ---- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h -+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h -@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16))); - /* 128-bit vector of 4 ints. */ - typedef int vi4_t __attribute__((vector_size (16))); - -+/* 128-bit vector of 1 quad precision float. */ -+typedef long double vlf1_t __attribute__((vector_size (16))); ++#undef CMT ++#define CMT " (check saturation: shift by max)" ++ TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); ++ TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); + - /* signed quad-word (in an union for the convenience of initialization). */ - union int128_t - { ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp -@@ -27,14 +27,26 @@ load_lib gcc-dg.exp - - # Initialize `dg'. - load_lib c-torture.exp --load_lib target-supports.exp --load_lib torture-options.exp - - dg-init - --if {[istarget arm*-*-*] -- && ![check_effective_target_arm_neon_ok]} then { -- return -+# The default action for a test is 'run'. Save current default. -+global dg-do-what-default -+set save-dg-do-what-default ${dg-do-what-default} ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT); ++} + -+# For ARM, make sure that we have a target compatible with NEON, and do -+# not attempt to run execution tests if the hardware doesn't support it. -+if {[istarget arm*-*-*]} then { -+ if {![check_effective_target_arm_neon_ok]} then { -+ return -+ } -+ if {![is-effective-target arm_neon_hw]} then { -+ set dg-do-what-default compile -+ } else { -+ set dg-do-what-default run -+ } -+} else { -+ set dg-do-what-default run - } - - torture-init -@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS - set additional_flags [add_options_for_arm_neon ""] - - # Main loop. --foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] { -- # If we're only testing specific files and this isn't one of them, skip it. -- if ![runtest_file_p $runtests $src] then { -- continue -- } -- -- # runtest_file_p is already run above, and the code below can run -- # runtest_file_p again, make sure everything for this test is -- # performed if the above runtest_file_p decided this runtest -- # instance should execute the test -- gcc_parallel_test_enable 0 -- c-torture-execute $src $additional_flags -- gcc-dg-runtest $src "" $additional_flags -- gcc_parallel_test_enable 1 --} -+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ -+ "" ${additional_flags} - - # All done. -+set dg-do-what-default ${save-dg-do-what-default} - torture-finish - dg-finish ++int main (void) ++{ ++ exec_vqshrn_n (); ++ return 0; ++} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c -@@ -0,0 +1,134 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c +@@ -0,0 +1,133 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + ++/* Expected values of cumulative_saturation flag with negative input. */ ++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; ++ ++/* Expected results with negative input. */ ++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++ ++/* Expected values of cumulative_saturation flag with max input value ++ shifted by 1. */ ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1; ++ ++/* Expected results with max input value shifted by 1. */ ++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 }; ++ +/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; ++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; ++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; ++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12, -+ 0x12, 0x12, 0x12, 0x12 }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82, -+ 0x82, 0x82, 0x82, 0x82 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 }; -+ -+/* Expected values of cumulative_saturation flag when saturation occurs. */ -+int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48, ++ 0x48, 0x48, 0x48, 0x48 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe }; + -+/* Expected results when saturation occurs. */ -+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + -+#define INSN_NAME vqmovn -+#define TEST_MSG "VQMOVN" ++#define INSN vqshrun_n ++#define TEST_MSG "VQSHRUN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + -+FNNAME (INSN_NAME) ++FNNAME (INSN) +{ -+ /* Basic test: y=OP(x), then store the result. */ -+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ -+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ ++ /* Basic test: y=vqshrun_n(x,v), then store the result. */ ++#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ ++ VECT_VAR(vector_res, uint, W2, N) = \ ++ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_u##W2(VECT_VAR(result, uint, W2, N), \ ++ VECT_VAR(vector_res, uint, W2, N)); \ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + -+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Two auxliary macros are necessary to expand INSN */ ++#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + -+ /* No need for 64 bits variants. */ ++#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ ++ TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ ++ ++ /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); -+ DECL_VARIABLE(vector, uint, 16, 8); -+ DECL_VARIABLE(vector, uint, 32, 4); -+ DECL_VARIABLE(vector, uint, 64, 2); + -+ DECL_VARIABLE(vector_res, int, 8, 8); -+ DECL_VARIABLE(vector_res, int, 16, 4); -+ DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + -+ /* Fill input vector with arbitrary values. */ -+ VDUP(vector, q, int, s, 16, 8, 0x12); -+ VDUP(vector, q, int, s, 32, 4, 0x1278); -+ VDUP(vector, q, int, s, 64, 2, 0x12345678); -+ VDUP(vector, q, uint, u, 16, 8, 0x82); -+ VDUP(vector, q, uint, u, 32, 4, 0x8765); -+ VDUP(vector, q, uint, u, 64, 2, 0x87654321); ++ /* Fill input vector with negative values, to check saturation on ++ limits. */ ++ VDUP(vector, q, int, s, 16, 8, -2); ++ VDUP(vector, q, int, s, 32, 4, -3); ++ VDUP(vector, q, int, s, 64, 2, -4); + -+ /* Apply a unary operator named INSN_NAME. */ ++ /* Choose shift amount arbitrarily. */ ++#define CMT " (negative input)" ++ TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT); ++ TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ ++ ++ /* Fill input vector with max value, to check saturation on ++ limits. */ ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ ++#undef CMT ++#define CMT " (check cumulative saturation)" ++ TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT); ++ TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT); ++ TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT); ++ ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT); ++ ++ ++ /* Fill input vector with positive values, to check normal case. */ ++ VDUP(vector, q, int, s, 16, 8, 0x1234); ++ VDUP(vector, q, int, s, 32, 4, 0x87654321); ++ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); ++ ++#undef CMT +#define CMT "" -+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT); ++ TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT); ++ TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT); ++ TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); + CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); + CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++} ++ ++int main (void) ++{ ++ exec_vqshrun_n (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c +@@ -25,10 +25,6 @@ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b, + 0xff8c, 0xff8d }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea, +@@ -49,14 +45,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a, + 0xffffff7b, 0xffffff7c }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68, + 0xffffffffffffff69 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected values of cumulative saturation flag. */ + int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c +@@ -0,0 +1,154 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++#include + ++/* Expected results with positive input. */ ++VECT_VAR_DECL(expected_positive,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected_positive,uint,32,4) [] = { 0xbf000000, 0xbf000000, ++ 0xbf000000, 0xbf000000 }; ++VECT_VAR_DECL(expected_positive,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 }; ++VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, ++ 0x3c030000, 0x3c030000 }; + -+ /* Fill input vector with arbitrary values which cause cumulative -+ saturation. */ -+ VDUP(vector, q, int, s, 16, 8, 0x1234); -+ VDUP(vector, q, int, s, 32, 4, 0x12345678); -+ VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL); -+ VDUP(vector, q, uint, u, 16, 8, 0x8234); -+ VDUP(vector, q, uint, u, 32, 4, 0x87654321); -+ VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL); ++/* Expected results with negative input. */ ++VECT_VAR_DECL(expected_negative,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_negative,uint,32,4) [] = { 0xee800000, 0xee800000, ++ 0xee800000, 0xee800000 }; ++VECT_VAR_DECL(expected_negative,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 }; ++VECT_VAR_DECL(expected_negative,hfloat,32,4) [] = { 0xbc030000, 0xbc030000, ++ 0xbc030000, 0xbc030000 }; ++ ++/* Expected results with FP special values (NaN, infinity). */ ++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; ++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected results with FP special values (zero, large value). */ ++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 }; ++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected results with FP special values (-0, -infinity). */ ++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; ++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++ ++/* Expected results with FP special large negative value. */ ++VECT_VAR_DECL(expected_fp4,hfloat,32,2) [] = { 0x80000000, 0x80000000 }; ++ ++#define TEST_MSG "VRECPE/VRECPEQ" ++void exec_vrecpe(void) ++{ ++ int i; ++ ++ /* Basic test: y=vrecpe(x), then store the result. */ ++#define TEST_VRECPE(Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)) ++ ++ /* No need for 64 bits variants. */ ++ DECL_VARIABLE(vector, uint, 32, 2); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, float, 32, 2); ++ DECL_VARIABLE(vector, float, 32, 4); ++ ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 32, 4); ++ DECL_VARIABLE(vector_res, float, 32, 2); ++ DECL_VARIABLE(vector_res, float, 32, 4); ++ ++ clean_results (); ++ ++ /* Choose init value arbitrarily, positive. */ ++ VDUP(vector, , uint, u, 32, 2, 0x12345678); ++ VDUP(vector, , float, f, 32, 2, 1.9f); ++ VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); ++ VDUP(vector, q, float, f, 32, 4, 125.0f); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, uint, u, 32, 2); ++ TEST_VRECPE(, float, f, 32, 2); ++ TEST_VRECPE(q, uint, u, 32, 4); ++ TEST_VRECPE(q, float, f, 32, 4); ++ ++#define CMT " (positive input)" ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_positive, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_positive, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_positive, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_positive, CMT); ++ ++ /* Choose init value arbitrarily,negative. */ ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , float, f, 32, 2, -10.0f); ++ VDUP(vector, q, uint, u, 32, 4, 0x89081234); ++ VDUP(vector, q, float, f, 32, 4, -125.0f); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, uint, u, 32, 2); ++ TEST_VRECPE(, float, f, 32, 2); ++ TEST_VRECPE(q, uint, u, 32, 4); ++ TEST_VRECPE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " (negative input)" ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_negative, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_negative, CMT); ++ ++ /* Test FP variants with special input values (NaN, infinity). */ ++ VDUP(vector, , float, f, 32, 2, NAN); ++ VDUP(vector, q, float, f, 32, 4, HUGE_VALF); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, float, f, 32, 2); ++ TEST_VRECPE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (NaN, infinity)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); ++ ++ /* Test FP variants with special input values (zero, large value). */ ++ VDUP(vector, , float, f, 32, 2, 0.0f); ++ VDUP(vector, q, float, f, 32, 4, 8.97229e37f /*9.0e37f*/); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, float, f, 32, 2); ++ TEST_VRECPE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (zero, large value)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); ++ ++ /* Test FP variants with special input values (-0, -infinity). */ ++ VDUP(vector, , float, f, 32, 2, -0.0f); ++ VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, float, f, 32, 2); ++ TEST_VRECPE(q, float, f, 32, 4); + -+ /* Apply a unary operator named INSN_NAME. */ +#undef CMT -+#define CMT " (with saturation)" -+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT); -+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT); -+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT); ++#define CMT " FP special (-0, -infinity)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT); ++ /* Test FP variants with special input values (large negative value). */ ++ VDUP(vector, , float, f, 32, 2, -9.0e37f); ++ ++ /* Apply the operator. */ ++ TEST_VRECPE(, float, f, 32, 2); ++ ++#undef CMT ++#define CMT " FP special (large negative value)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp4, CMT); +} + +int main (void) +{ -+ exec_vqmovn (); ++ exec_vrecpe (); + return 0; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c -@@ -0,0 +1,93 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c +@@ -0,0 +1,117 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" ++#include + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; ++/* Expected results with positive input. */ ++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 }; ++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f, ++ 0xc1db851f, 0xc1db851f }; ++ ++/* Expected results with FP special values (NaN). */ ++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; ++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, ++ 0x7fc00000, 0x7fc00000 }; ++ ++/* Expected results with FP special values (infinity, 0) and normal ++ values. */ ++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; ++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; + -+/* Expected results. */ -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34, -+ 0x34, 0x34, 0x34, 0x34 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 }; ++/* Expected results with FP special values (infinity, 0). */ ++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; + -+/* Expected values of cumulative_saturation flag with negative input. */ -+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1; ++#define TEST_MSG "VRECPS/VRECPSQ" ++void exec_vrecps(void) ++{ ++ int i; + -+/* Expected results with negative input. */ -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++ /* Basic test: y=vrecps(x), then store the result. */ ++#define TEST_VRECPS(Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)) ++ ++ /* No need for integer variants. */ ++ DECL_VARIABLE(vector, float, 32, 2); ++ DECL_VARIABLE(vector, float, 32, 4); + -+#define INSN_NAME vqmovun -+#define TEST_MSG "VQMOVUN" ++ DECL_VARIABLE(vector2, float, 32, 2); ++ DECL_VARIABLE(vector2, float, 32, 4); + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ DECL_VARIABLE(vector_res, float, 32, 2); ++ DECL_VARIABLE(vector_res, float, 32, 4); + -+FNNAME (INSN_NAME) -+{ -+ /* Basic test: y=OP(x), then store the result. */ -+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ -+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ clean_results (); + -+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Choose init value arbitrarily. */ ++ VDUP(vector, , float, f, 32, 2, 12.9f); ++ VDUP(vector, q, float, f, 32, 4, 9.2f); + -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); -+ DECL_VARIABLE(vector, int, 64, 2); ++ VDUP(vector2, , float, f, 32, 2, 8.9f); ++ VDUP(vector2, q, float, f, 32, 4, 3.2f); + -+ DECL_VARIABLE(vector_res, uint, 8, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 32, 2); ++ /* Apply the operator. */ ++ TEST_VRECPS(, float, f, 32, 2); ++ TEST_VRECPS(q, float, f, 32, 4); + -+ clean_results (); ++#define CMT " (positive input)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); + -+ /* Fill input vector with arbitrary values. */ -+ VDUP(vector, q, int, s, 16, 8, 0x34); -+ VDUP(vector, q, int, s, 32, 4, 0x5678); -+ VDUP(vector, q, int, s, 64, 2, 0x12345678); + -+ /* Apply a unary operator named INSN_NAME. */ -+#define CMT "" -+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT); ++ /* Test FP variants with special input values (NaN). */ ++ VDUP(vector, , float, f, 32, 2, NAN); ++ VDUP(vector2, q, float, f, 32, 4, NAN); + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ /* Apply the operator. */ ++ TEST_VRECPS(, float, f, 32, 2); ++ TEST_VRECPS(q, float, f, 32, 4); + -+ /* Fill input vector with negative values. */ -+ VDUP(vector, q, int, s, 16, 8, 0x8234); -+ VDUP(vector, q, int, s, 32, 4, 0x87654321); -+ VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL); ++#undef CMT ++#define CMT " FP special (NaN)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); ++ ++ ++ /* Test FP variants with special input values (infinity, 0). */ ++ VDUP(vector, , float, f, 32, 2, HUGE_VALF); ++ VDUP(vector, q, float, f, 32, 4, 0.0f); ++ VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value. */ ++ ++ /* Apply the operator. */ ++ TEST_VRECPS(, float, f, 32, 2); ++ TEST_VRECPS(q, float, f, 32, 4); + -+ /* Apply a unary operator named INSN_NAME. */ +#undef CMT -+#define CMT " (negative input)" -+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT); -+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT); ++#define CMT " FP special (infinity, 0) and normal value" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++ ++ /* Test FP variants with only special input values (infinity, 0). */ ++ VDUP(vector, , float, f, 32, 2, HUGE_VALF); ++ VDUP(vector, q, float, f, 32, 4, 0.0f); ++ VDUP(vector2, , float, f, 32, 2, 0.0f); ++ VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); ++ ++ /* Apply the operator */ ++ TEST_VRECPS(, float, f, 32, 2); ++ TEST_VRECPS(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (infinity, 0)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); +} + +int main (void) +{ -+ exec_vqmovun (); ++ exec_vrecps (); + return 0; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c -@@ -0,0 +1,161 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret.c +@@ -0,0 +1,741 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+/* Expected values of cumulative_saturation flag when multiplication -+ saturates. */ -+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; -+ -+/* Expected results when multiplication saturates. */ -+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; -+ -+/* Expected values of cumulative_saturation flag when rounding -+ should not cause saturation. */ -+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; -+ -+/* Expected results when rounding should not cause saturation. */ -+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; ++/* Expected results for vreinterpret_s8_xx. */ ++VECT_VAR_DECL(expected_s8_1,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_s8_2,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_s8_3,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_s8_4,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_s8_5,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_s8_6,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_s8_7,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_s8_8,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_s8_9,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++ ++/* Expected results for vreinterpret_s16_xx. */ ++VECT_VAR_DECL(expected_s16_1,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_s16_2,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_s16_3,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_s16_4,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_s16_5,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_s16_6,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_s16_7,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_s16_8,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_s16_9,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++ ++/* Expected results for vreinterpret_s32_xx. */ ++VECT_VAR_DECL(expected_s32_1,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_s32_2,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_s32_3,int,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_s32_4,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_s32_5,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_s32_6,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_s32_7,int,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_s32_8,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_s32_9,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++ ++/* Expected results for vreinterpret_s64_xx. */ ++VECT_VAR_DECL(expected_s64_1,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_s64_2,int,64,1) [] = { 0xfff3fff2fff1fff0 }; ++VECT_VAR_DECL(expected_s64_3,int,64,1) [] = { 0xfffffff1fffffff0 }; ++VECT_VAR_DECL(expected_s64_4,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_s64_5,int,64,1) [] = { 0xfff3fff2fff1fff0 }; ++VECT_VAR_DECL(expected_s64_6,int,64,1) [] = { 0xfffffff1fffffff0 }; ++VECT_VAR_DECL(expected_s64_7,int,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected_s64_8,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_s64_9,int,64,1) [] = { 0xfff3fff2fff1fff0 }; ++ ++/* Expected results for vreinterpret_u8_xx. */ ++VECT_VAR_DECL(expected_u8_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_u8_2,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_u8_3,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_u8_4,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_u8_5,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_u8_6,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_u8_7,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_u8_8,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_u8_9,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++ ++/* Expected results for vreinterpret_u16_xx. */ ++VECT_VAR_DECL(expected_u16_1,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_u16_2,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_u16_3,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_u16_4,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_u16_5,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_u16_6,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_u16_7,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_u16_8,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_u16_9,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++ ++/* Expected results for vreinterpret_u32_xx. */ ++VECT_VAR_DECL(expected_u32_1,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_u32_2,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_u32_3,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_u32_4,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_u32_5,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_u32_6,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_u32_7,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_u32_8,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_u32_9,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++ ++/* Expected results for vreinterpret_u64_xx. */ ++VECT_VAR_DECL(expected_u64_1,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_u64_2,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; ++VECT_VAR_DECL(expected_u64_3,uint,64,1) [] = { 0xfffffff1fffffff0 }; ++VECT_VAR_DECL(expected_u64_4,uint,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected_u64_5,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_u64_6,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; ++VECT_VAR_DECL(expected_u64_7,uint,64,1) [] = { 0xfffffff1fffffff0 }; ++VECT_VAR_DECL(expected_u64_8,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; ++VECT_VAR_DECL(expected_u64_9,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; ++ ++/* Expected results for vreinterpret_p8_xx. */ ++VECT_VAR_DECL(expected_p8_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_p8_2,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_p8_3,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_p8_4,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_p8_5,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7 }; ++VECT_VAR_DECL(expected_p8_6,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; ++VECT_VAR_DECL(expected_p8_7,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_p8_8,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_p8_9,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff }; + -+#define INSN vqrdmulh -+#define TEST_MSG "VQRDMULH" ++/* Expected results for vreinterpret_p16_xx. */ ++VECT_VAR_DECL(expected_p16_1,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_p16_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_p16_3,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_p16_4,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_p16_5,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++VECT_VAR_DECL(expected_p16_6,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_p16_7,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; ++VECT_VAR_DECL(expected_p16_8,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_p16_9,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; ++ ++/* Expected results for vreinterpretq_s8_xx. */ ++VECT_VAR_DECL(expected_q_s8_1,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++VECT_VAR_DECL(expected_q_s8_2,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xf2, 0xff, 0xff, 0xff, ++ 0xf3, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_s8_3,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_s8_4,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff }; ++VECT_VAR_DECL(expected_q_s8_5,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++VECT_VAR_DECL(expected_q_s8_6,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xf2, 0xff, 0xff, 0xff, ++ 0xf3, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_s8_7,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_s8_8,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff }; ++VECT_VAR_DECL(expected_q_s8_9,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++ ++/* Expected results for vreinterpretq_s16_xx. */ ++VECT_VAR_DECL(expected_q_s16_1,int,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_s16_2,int,16,8) [] = { 0xfff0, 0xffff, ++ 0xfff1, 0xffff, ++ 0xfff2, 0xffff, ++ 0xfff3, 0xffff }; ++VECT_VAR_DECL(expected_q_s16_3,int,16,8) [] = { 0xfff0, 0xffff, ++ 0xffff, 0xffff, ++ 0xfff1, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_q_s16_4,int,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_s16_5,int,16,8) [] = { 0xfff0, 0xfff1, ++ 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, ++ 0xfff6, 0xfff7 }; ++VECT_VAR_DECL(expected_q_s16_6,int,16,8) [] = { 0xfff0, 0xffff, ++ 0xfff1, 0xffff, ++ 0xfff2, 0xffff, ++ 0xfff3, 0xffff }; ++VECT_VAR_DECL(expected_q_s16_7,int,16,8) [] = { 0xfff0, 0xffff, ++ 0xffff, 0xffff, ++ 0xfff1, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_q_s16_8,int,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_s16_9,int,16,8) [] = { 0xfff0, 0xfff1, ++ 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, ++ 0xfff6, 0xfff7 }; ++ ++/* Expected results for vreinterpretq_s32_xx. */ ++VECT_VAR_DECL(expected_q_s32_1,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_s32_2,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_s32_3,int,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_s32_4,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_s32_5,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_s32_6,int,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_q_s32_7,int,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_s32_8,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_s32_9,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++ ++/* Expected results for vreinterpretq_s64_xx. */ ++VECT_VAR_DECL(expected_q_s64_1,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_s64_2,int,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++VECT_VAR_DECL(expected_q_s64_3,int,64,2) [] = { 0xfffffff1fffffff0, ++ 0xfffffff3fffffff2 }; ++VECT_VAR_DECL(expected_q_s64_4,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_s64_5,int,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++VECT_VAR_DECL(expected_q_s64_6,int,64,2) [] = { 0xfffffff1fffffff0, ++ 0xfffffff3fffffff2 }; ++VECT_VAR_DECL(expected_q_s64_7,int,64,2) [] = { 0xfffffffffffffff0, ++ 0xfffffffffffffff1 }; ++VECT_VAR_DECL(expected_q_s64_8,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_s64_9,int,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++ ++/* Expected results for vreinterpretq_u8_xx. */ ++VECT_VAR_DECL(expected_q_u8_1,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff }; ++VECT_VAR_DECL(expected_q_u8_2,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++VECT_VAR_DECL(expected_q_u8_3,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xf2, 0xff, 0xff, 0xff, ++ 0xf3, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_u8_4,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_u8_5,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++VECT_VAR_DECL(expected_q_u8_6,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xf2, 0xff, 0xff, 0xff, ++ 0xf3, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_u8_7,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xf1, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_q_u8_8,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff }; ++VECT_VAR_DECL(expected_q_u8_9,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, ++ 0xf2, 0xff, 0xf3, 0xff, ++ 0xf4, 0xff, 0xf5, 0xff, ++ 0xf6, 0xff, 0xf7, 0xff }; ++ ++/* Expected results for vreinterpretq_u16_xx. */ ++VECT_VAR_DECL(expected_q_u16_1,uint,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_u16_2,uint,16,8) [] = { 0xfff0, 0xfff1, ++ 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, ++ 0xfff6, 0xfff7 }; ++VECT_VAR_DECL(expected_q_u16_3,uint,16,8) [] = { 0xfff0, 0xffff, ++ 0xfff1, 0xffff, ++ 0xfff2, 0xffff, ++ 0xfff3, 0xffff }; ++VECT_VAR_DECL(expected_q_u16_4,uint,16,8) [] = { 0xfff0, 0xffff, ++ 0xffff, 0xffff, ++ 0xfff1, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_q_u16_5,uint,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_u16_6,uint,16,8) [] = { 0xfff0, 0xffff, ++ 0xfff1, 0xffff, ++ 0xfff2, 0xffff, ++ 0xfff3, 0xffff }; ++VECT_VAR_DECL(expected_q_u16_7,uint,16,8) [] = { 0xfff0, 0xffff, ++ 0xffff, 0xffff, ++ 0xfff1, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_q_u16_8,uint,16,8) [] = { 0xf1f0, 0xf3f2, ++ 0xf5f4, 0xf7f6, ++ 0xf9f8, 0xfbfa, ++ 0xfdfc, 0xfffe }; ++VECT_VAR_DECL(expected_q_u16_9,uint,16,8) [] = { 0xfff0, 0xfff1, ++ 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, ++ 0xfff6, 0xfff7 }; ++ ++/* Expected results for vreinterpretq_u32_xx. */ ++VECT_VAR_DECL(expected_q_u32_1,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_u32_2,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_u32_3,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_q_u32_4,uint,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_u32_5,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_u32_6,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_u32_7,uint,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_u32_8,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_u32_9,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++ ++/* Expected results for vreinterpretq_u64_xx. */ ++VECT_VAR_DECL(expected_q_u64_1,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_u64_2,uint,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++VECT_VAR_DECL(expected_q_u64_3,uint,64,2) [] = { 0xfffffff1fffffff0, ++ 0xfffffff3fffffff2 }; ++VECT_VAR_DECL(expected_q_u64_4,uint,64,2) [] = { 0xfffffffffffffff0, ++ 0xfffffffffffffff1 }; ++VECT_VAR_DECL(expected_q_u64_5,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_u64_6,uint,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++VECT_VAR_DECL(expected_q_u64_7,uint,64,2) [] = { 0xfffffff1fffffff0, ++ 0xfffffff3fffffff2 }; ++VECT_VAR_DECL(expected_q_u64_8,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, ++ 0xfffefdfcfbfaf9f8 }; ++VECT_VAR_DECL(expected_q_u64_9,uint,64,2) [] = { 0xfff3fff2fff1fff0, ++ 0xfff7fff6fff5fff4 }; ++ ++/* Expected results for vreinterpret_f32_xx. */ ++VECT_VAR_DECL(expected_f32_1,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_f32_2,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_f32_3,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_f32_4,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_f32_5,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_f32_6,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++VECT_VAR_DECL(expected_f32_7,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_f32_8,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; ++VECT_VAR_DECL(expected_f32_9,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; ++VECT_VAR_DECL(expected_f32_10,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; ++ ++/* Expected results for vreinterpretq_f32_xx. */ ++VECT_VAR_DECL(expected_q_f32_1,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_f32_2,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_f32_3,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_q_f32_4,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_f32_5,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_f32_6,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++VECT_VAR_DECL(expected_q_f32_7,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_q_f32_8,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, ++ 0xfffffff1, 0xffffffff }; ++VECT_VAR_DECL(expected_q_f32_9,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, ++ 0xfbfaf9f8, 0xfffefdfc }; ++VECT_VAR_DECL(expected_q_f32_10,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, ++ 0xfff5fff4, 0xfff7fff6 }; ++ ++/* Expected results for vreinterpretq_xx_f32. */ ++VECT_VAR_DECL(expected_xx_f32_1,int,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1 }; ++VECT_VAR_DECL(expected_xx_f32_2,int,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; ++VECT_VAR_DECL(expected_xx_f32_3,int,32,2) [] = { 0xc1800000, 0xc1700000 }; ++VECT_VAR_DECL(expected_xx_f32_4,int,64,1) [] = { 0xc1700000c1800000 }; ++VECT_VAR_DECL(expected_xx_f32_5,uint,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1 }; ++VECT_VAR_DECL(expected_xx_f32_6,uint,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; ++VECT_VAR_DECL(expected_xx_f32_7,uint,32,2) [] = { 0xc1800000, 0xc1700000 }; ++VECT_VAR_DECL(expected_xx_f32_8,uint,64,1) [] = { 0xc1700000c1800000 }; ++VECT_VAR_DECL(expected_xx_f32_9,poly,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1 }; ++VECT_VAR_DECL(expected_xx_f32_10,poly,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; ++ ++/* Expected results for vreinterpretq_xx_f32. */ ++VECT_VAR_DECL(expected_q_xx_f32_1,int,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1, ++ 0x0, 0x0, 0x60, 0xc1, ++ 0x0, 0x0, 0x50, 0xc1 }; ++VECT_VAR_DECL(expected_q_xx_f32_2,int,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, ++ 0x0, 0xc160, 0x0, 0xc150 }; ++VECT_VAR_DECL(expected_q_xx_f32_3,int,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0xc1600000, 0xc1500000 }; ++VECT_VAR_DECL(expected_q_xx_f32_4,int,64,2) [] = { 0xc1700000c1800000, ++ 0xc1500000c1600000 }; ++VECT_VAR_DECL(expected_q_xx_f32_5,uint,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1, ++ 0x0, 0x0, 0x60, 0xc1, ++ 0x0, 0x0, 0x50, 0xc1 }; ++VECT_VAR_DECL(expected_q_xx_f32_6,uint,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, ++ 0x0, 0xc160, 0x0, 0xc150 }; ++VECT_VAR_DECL(expected_q_xx_f32_7,uint,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0xc1600000, 0xc1500000 }; ++VECT_VAR_DECL(expected_q_xx_f32_8,uint,64,2) [] = { 0xc1700000c1800000, ++ 0xc1500000c1600000 }; ++VECT_VAR_DECL(expected_q_xx_f32_9,poly,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, ++ 0x0, 0x0, 0x70, 0xc1, ++ 0x0, 0x0, 0x60, 0xc1, ++ 0x0, 0x0, 0x50, 0xc1 }; ++VECT_VAR_DECL(expected_q_xx_f32_10,poly,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, ++ 0x0, 0xc160, 0x0, 0xc150 }; + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++#define TEST_MSG "VREINTERPRET/VREINTERPRETQ" + -+FNNAME (INSN) ++void exec_vreinterpret (void) +{ -+ /* vector_res = vqrdmulh(vector,vector2), then store the result. */ -+#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ ++ int i; ++ ++ /* Basic test: y=vreinterpret(x), then store the result. */ ++#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N)); \ ++ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) -+ -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) -+ -+#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) -+ ++ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + -+ DECL_VARIABLE(vector, int, 16, 4); -+ DECL_VARIABLE(vector, int, 32, 2); -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); ++#define TEST_VREINTERPRET_POLY(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + -+ DECL_VARIABLE(vector_res, int, 16, 4); -+ DECL_VARIABLE(vector_res, int, 32, 2); -+ DECL_VARIABLE(vector_res, int, 16, 8); -+ DECL_VARIABLE(vector_res, int, 32, 4); ++#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); \ ++ CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + -+ DECL_VARIABLE(vector2, int, 16, 4); -+ DECL_VARIABLE(vector2, int, 32, 2); -+ DECL_VARIABLE(vector2, int, 16, 8); -+ DECL_VARIABLE(vector2, int, 32, 4); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + -+ VLOAD(vector, buffer, , int, s, 16, 4); -+ VLOAD(vector, buffer, , int, s, 32, 2); -+ VLOAD(vector, buffer, q, int, s, 16, 8); -+ VLOAD(vector, buffer, q, int, s, 32, 4); -+ -+ /* Initialize vector2. */ -+ VDUP(vector2, , int, s, 16, 4, 0x5555); -+ VDUP(vector2, , int, s, 32, 2, 0xBB); -+ VDUP(vector2, q, int, s, 16, 8, 0x33); -+ VDUP(vector2, q, int, s, 32, 4, 0x22); -+ -+#define CMT "" -+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT); -+ -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); -+ -+ /* Now use input values such that the multiplication causes -+ saturation. */ -+#define TEST_MSG_MUL " (check mul cumulative saturation)" -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector2, , int, s, 16, 4, 0x8000); -+ VDUP(vector2, , int, s, 32, 2, 0x80000000); -+ VDUP(vector2, q, int, s, 16, 8, 0x8000); -+ VDUP(vector2, q, int, s, 32, 4, 0x80000000); + -+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); -+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL); -+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL); -+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ VLOAD(vector, buffer, , float, f, 32, 2); ++ VLOAD(vector, buffer, q, float, f, 32, 4); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); ++ /* vreinterpret_s8_xx. */ ++ TEST_VREINTERPRET(, int, s, 8, 8, int, s, 16, 4, expected_s8_1); ++ TEST_VREINTERPRET(, int, s, 8, 8, int, s, 32, 2, expected_s8_2); ++ TEST_VREINTERPRET(, int, s, 8, 8, int, s, 64, 1, expected_s8_3); ++ TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 8, 8, expected_s8_4); ++ TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 16, 4, expected_s8_5); ++ TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 32, 2, expected_s8_6); ++ TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1, expected_s8_7); ++ TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 8, 8, expected_s8_8); ++ TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 16, 4, expected_s8_9); ++ ++ /* vreinterpret_s16_xx. */ ++ TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8, expected_s16_1); ++ TEST_VREINTERPRET(, int, s, 16, 4, int, s, 32, 2, expected_s16_2); ++ TEST_VREINTERPRET(, int, s, 16, 4, int, s, 64, 1, expected_s16_3); ++ TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 8, 8, expected_s16_4); ++ TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 16, 4, expected_s16_5); ++ TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 32, 2, expected_s16_6); ++ TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1, expected_s16_7); ++ TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 8, 8, expected_s16_8); ++ TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 16, 4, expected_s16_9); ++ ++ /* vreinterpret_s32_xx. */ ++ TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8, expected_s32_1); ++ TEST_VREINTERPRET(, int, s, 32, 2, int, s, 16, 4, expected_s32_2); ++ TEST_VREINTERPRET(, int, s, 32, 2, int, s, 64, 1, expected_s32_3); ++ TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 8, 8, expected_s32_4); ++ TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 16, 4, expected_s32_5); ++ TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 32, 2, expected_s32_6); ++ TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1, expected_s32_7); ++ TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 8, 8, expected_s32_8); ++ TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 16, 4, expected_s32_9); ++ ++ /* vreinterpret_s64_xx. */ ++ TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8, expected_s64_1); ++ TEST_VREINTERPRET(, int, s, 64, 1, int, s, 16, 4, expected_s64_2); ++ TEST_VREINTERPRET(, int, s, 64, 1, int, s, 32, 2, expected_s64_3); ++ TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 8, 8, expected_s64_4); ++ TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 16, 4, expected_s64_5); ++ TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 32, 2, expected_s64_6); ++ TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1, expected_s64_7); ++ TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 8, 8, expected_s64_8); ++ TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 16, 4, expected_s64_9); ++ ++ /* vreinterpret_u8_xx. */ ++ TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8, expected_u8_1); ++ TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 16, 4, expected_u8_2); ++ TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 32, 2, expected_u8_3); ++ TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 64, 1, expected_u8_4); ++ TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 16, 4, expected_u8_5); ++ TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 32, 2, expected_u8_6); ++ TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1, expected_u8_7); ++ TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 8, 8, expected_u8_8); ++ TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 16, 4, expected_u8_9); ++ ++ /* vreinterpret_u16_xx. */ ++ TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8, expected_u16_1); ++ TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 16, 4, expected_u16_2); ++ TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 32, 2, expected_u16_3); ++ TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 64, 1, expected_u16_4); ++ TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 8, 8, expected_u16_5); ++ TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 32, 2, expected_u16_6); ++ TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1, expected_u16_7); ++ TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 8, 8, expected_u16_8); ++ TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 16, 4, expected_u16_9); ++ ++ /* vreinterpret_u32_xx. */ ++ TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8, expected_u32_1); ++ TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 16, 4, expected_u32_2); ++ TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 32, 2, expected_u32_3); ++ TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 64, 1, expected_u32_4); ++ TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 8, 8, expected_u32_5); ++ TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 16, 4, expected_u32_6); ++ TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1, expected_u32_7); ++ TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 8, 8, expected_u32_8); ++ TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 16, 4, expected_u32_9); ++ ++ /* vreinterpret_u64_xx. */ ++ TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8, expected_u64_1); ++ TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 16, 4, expected_u64_2); ++ TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 32, 2, expected_u64_3); ++ TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 64, 1, expected_u64_4); ++ TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 8, 8, expected_u64_5); ++ TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 16, 4, expected_u64_6); ++ TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2, expected_u64_7); ++ TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 8, 8, expected_u64_8); ++ TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 16, 4, expected_u64_9); ++ ++ /* vreinterpret_p8_xx. */ ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 8, 8, expected_p8_1); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 16, 4, expected_p8_2); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 32, 2, expected_p8_3); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 64, 1, expected_p8_4); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 8, 8, expected_p8_5); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 16, 4, expected_p8_6); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 32, 2, expected_p8_7); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 64, 1, expected_p8_8); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, poly, p, 16, 4, expected_p8_9); ++ ++ /* vreinterpret_p16_xx. */ ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 8, 8, expected_p16_1); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 16, 4, expected_p16_2); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 32, 2, expected_p16_3); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 64, 1, expected_p16_4); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 8, 8, expected_p16_5); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 16, 4, expected_p16_6); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 32, 2, expected_p16_7); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 64, 1, expected_p16_8); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, poly, p, 8, 8, expected_p16_9); ++ ++ /* vreinterpretq_s8_xx. */ ++ TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8, expected_q_s8_1); ++ TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 32, 4, expected_q_s8_2); ++ TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 64, 2, expected_q_s8_3); ++ TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 8, 16, expected_q_s8_4); ++ TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 16, 8, expected_q_s8_5); ++ TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 32, 4, expected_q_s8_6); ++ TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2, expected_q_s8_7); ++ TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 8, 16, expected_q_s8_8); ++ TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 16, 8, expected_q_s8_9); ++ ++ /* vreinterpretq_s16_xx. */ ++ TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16, expected_q_s16_1); ++ TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 32, 4, expected_q_s16_2); ++ TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 64, 2, expected_q_s16_3); ++ TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 8, 16, expected_q_s16_4); ++ TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 16, 8, expected_q_s16_5); ++ TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 32, 4, expected_q_s16_6); ++ TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2, expected_q_s16_7); ++ TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 8, 16, expected_q_s16_8); ++ TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 16, 8, expected_q_s16_9); ++ ++ /* vreinterpretq_s32_xx. */ ++ TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16, expected_q_s32_1); ++ TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 16, 8, expected_q_s32_2); ++ TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 64, 2, expected_q_s32_3); ++ TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 8, 16, expected_q_s32_4); ++ TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 16, 8, expected_q_s32_5); ++ TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 32, 4, expected_q_s32_6); ++ TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2, expected_q_s32_7); ++ TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 8, 16, expected_q_s32_8); ++ TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 16, 8, expected_q_s32_9); ++ ++ /* vreinterpretq_s64_xx. */ ++ TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16, expected_q_s64_1); ++ TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 16, 8, expected_q_s64_2); ++ TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 32, 4, expected_q_s64_3); ++ TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 8, 16, expected_q_s64_4); ++ TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 16, 8, expected_q_s64_5); ++ TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 32, 4, expected_q_s64_6); ++ TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2, expected_q_s64_7); ++ TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 8, 16, expected_q_s64_8); ++ TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 16, 8, expected_q_s64_9); ++ ++ /* vreinterpretq_u8_xx. */ ++ TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16, expected_q_u8_1); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 16, 8, expected_q_u8_2); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 32, 4, expected_q_u8_3); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 64, 2, expected_q_u8_4); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 16, 8, expected_q_u8_5); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 32, 4, expected_q_u8_6); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2, expected_q_u8_7); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 8, 16, expected_q_u8_8); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 16, 8, expected_q_u8_9); ++ ++ /* vreinterpretq_u16_xx. */ ++ TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16, expected_q_u16_1); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 16, 8, expected_q_u16_2); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 32, 4, expected_q_u16_3); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 64, 2, expected_q_u16_4); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 8, 16, expected_q_u16_5); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 32, 4, expected_q_u16_6); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2, expected_q_u16_7); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 8, 16, expected_q_u16_8); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 16, 8, expected_q_u16_9); ++ ++ /* vreinterpretq_u32_xx. */ ++ TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16, expected_q_u32_1); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 16, 8, expected_q_u32_2); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 32, 4, expected_q_u32_3); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 64, 2, expected_q_u32_4); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 8, 16, expected_q_u32_5); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 16, 8, expected_q_u32_6); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2, expected_q_u32_7); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 8, 16, expected_q_u32_8); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 16, 8, expected_q_u32_9); ++ ++ /* vreinterpretq_u64_xx. */ ++ TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16, expected_q_u64_1); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 16, 8, expected_q_u64_2); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 32, 4, expected_q_u64_3); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 64, 2, expected_q_u64_4); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 8, 16, expected_q_u64_5); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 16, 8, expected_q_u64_6); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4, expected_q_u64_7); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 8, 16, expected_q_u64_8); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 16, 8, expected_q_u64_9); ++ ++ /* vreinterpret_f32_xx. */ ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8, expected_f32_1); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 16, 4, expected_f32_2); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 32, 2, expected_f32_3); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 64, 1, expected_f32_4); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 8, 8, expected_f32_5); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 16, 4, expected_f32_6); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 32, 2, expected_f32_7); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1, expected_f32_8); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 8, 8, expected_f32_9); ++ TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 16, 4, expected_f32_10); ++ ++ /* vreinterpretq_f32_xx. */ ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16, expected_q_f32_1); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 16, 8, expected_q_f32_2); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 32, 4, expected_q_f32_3); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 64, 2, expected_q_f32_4); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 8, 16, expected_q_f32_5); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 16, 8, expected_q_f32_6); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 32, 4, expected_q_f32_7); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2, expected_q_f32_8); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 8, 16, expected_q_f32_9); ++ TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 16, 8, expected_q_f32_10); ++ ++ /* vreinterpret_xx_f32. */ ++ TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2, expected_xx_f32_1); ++ TEST_VREINTERPRET(, int, s, 16, 4, float, f, 32, 2, expected_xx_f32_2); ++ TEST_VREINTERPRET(, int, s, 32, 2, float, f, 32, 2, expected_xx_f32_3); ++ TEST_VREINTERPRET(, int, s, 64, 1, float, f, 32, 2, expected_xx_f32_4); ++ TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 32, 2, expected_xx_f32_5); ++ TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2, expected_xx_f32_6); ++ TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2, expected_xx_f32_7); ++ TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2, expected_xx_f32_8); ++ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 32, 2, expected_xx_f32_9); ++ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 32, 2, expected_xx_f32_10); ++ ++ /* vreinterpretq_xx_f32. */ ++ TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4, expected_q_xx_f32_1); ++ TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 32, 4, expected_q_xx_f32_2); ++ TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 32, 4, expected_q_xx_f32_3); ++ TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 32, 4, expected_q_xx_f32_4); ++ TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 32, 4, expected_q_xx_f32_5); ++ TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4, expected_q_xx_f32_6); ++ TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4, expected_q_xx_f32_7); ++ TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4, expected_q_xx_f32_8); ++ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4, expected_q_xx_f32_9); ++ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4, expected_q_xx_f32_10); ++} + -+ /* Use input values where rounding produces a result equal to the -+ saturation value, but does not set the saturation flag. */ -+#define TEST_MSG_ROUND " (check rounding)" -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector2, , int, s, 16, 4, 0x8001); -+ VDUP(vector2, , int, s, 32, 2, 0x80000001); -+ VDUP(vector2, q, int, s, 16, 8, 0x8001); -+ VDUP(vector2, q, int, s, 32, 4, 0x80000001); ++int main (void) ++{ ++ exec_vreinterpret (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c +@@ -0,0 +1,200 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND); -+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND); -+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND); -+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND); ++/* Expected results for vrev16. */ ++VECT_VAR_DECL(expected_vrev16,int,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6 }; ++VECT_VAR_DECL(expected_vrev16,uint,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6 }; ++VECT_VAR_DECL(expected_vrev16,poly,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6 }; ++VECT_VAR_DECL(expected_vrev16,int,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6, ++ 0xf9, 0xf8, 0xfb, 0xfa, ++ 0xfd, 0xfc, 0xff, 0xfe }; ++VECT_VAR_DECL(expected_vrev16,uint,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6, ++ 0xf9, 0xf8, 0xfb, 0xfa, ++ 0xfd, 0xfc, 0xff, 0xfe }; ++VECT_VAR_DECL(expected_vrev16,poly,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, ++ 0xf5, 0xf4, 0xf7, 0xf6, ++ 0xf9, 0xf8, 0xfb, 0xfa, ++ 0xfd, 0xfc, 0xff, 0xfe }; ++ ++/* Expected results for vrev32. */ ++VECT_VAR_DECL(expected_vrev32,int,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4 }; ++VECT_VAR_DECL(expected_vrev32,int,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; ++VECT_VAR_DECL(expected_vrev32,uint,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4 }; ++VECT_VAR_DECL(expected_vrev32,uint,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; ++VECT_VAR_DECL(expected_vrev32,poly,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4 }; ++VECT_VAR_DECL(expected_vrev32,poly,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; ++VECT_VAR_DECL(expected_vrev32,int,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xfb, 0xfa, 0xf9, 0xf8, ++ 0xff, 0xfe, 0xfd, 0xfc }; ++VECT_VAR_DECL(expected_vrev32,int,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, ++ 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; ++VECT_VAR_DECL(expected_vrev32,uint,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xfb, 0xfa, 0xf9, 0xf8, ++ 0xff, 0xfe, 0xfd, 0xfc }; ++VECT_VAR_DECL(expected_vrev32,uint,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, ++ 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; ++VECT_VAR_DECL(expected_vrev32,poly,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xfb, 0xfa, 0xf9, 0xf8, ++ 0xff, 0xfe, 0xfd, 0xfc }; ++VECT_VAR_DECL(expected_vrev32,poly,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, ++ 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; ++ ++/* Expected results for vrev64. */ ++VECT_VAR_DECL(expected_vrev64,int,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xf3, 0xf2, 0xf1, 0xf0 }; ++VECT_VAR_DECL(expected_vrev64,int,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; ++VECT_VAR_DECL(expected_vrev64,int,32,2) [] = { 0xfffffff1, 0xfffffff0 }; ++VECT_VAR_DECL(expected_vrev64,uint,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, ++ 0xf2, 0xf1, 0xf0 }; ++VECT_VAR_DECL(expected_vrev64,uint,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; ++VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; ++VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xf3, 0xf2, 0xf1, 0xf0 }; ++VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; ++VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; ++VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xff, 0xfe, 0xfd, 0xfc, ++ 0xfb, 0xfa, 0xf9, 0xf8 }; ++VECT_VAR_DECL(expected_vrev64,int,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, ++ 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; ++VECT_VAR_DECL(expected_vrev64,int,32,4) [] = { 0xfffffff1, 0xfffffff0, ++ 0xfffffff3, 0xfffffff2 }; ++VECT_VAR_DECL(expected_vrev64,uint,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xff, 0xfe, 0xfd, 0xfc, ++ 0xfb, 0xfa, 0xf9, 0xf8 }; ++VECT_VAR_DECL(expected_vrev64,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, ++ 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; ++VECT_VAR_DECL(expected_vrev64,uint,32,4) [] = { 0xfffffff1, 0xfffffff0, ++ 0xfffffff3, 0xfffffff2 }; ++VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, ++ 0xf3, 0xf2, 0xf1, 0xf0, ++ 0xff, 0xfe, 0xfd, 0xfc, ++ 0xfb, 0xfa, 0xf9, 0xf8 }; ++VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, ++ 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; ++VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, ++ 0xc1500000, 0xc1600000 }; + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++void exec_vrev (void) ++{ ++ /* Basic test: y=vrev(x), then store the result. */ ++#define TEST_VREV(Q, T1, T2, W, N, W2) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ ++ clean_results (); ++ ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ VLOAD(vector, buffer, , float, f, 32, 2); ++ VLOAD(vector, buffer, q, float, f, 32, 4); ++ ++ /* Check vrev in each of the existing combinations. */ ++#define TEST_MSG "VREV16" ++ TEST_VREV(, int, s, 8, 8, 16); ++ TEST_VREV(, uint, u, 8, 8, 16); ++ TEST_VREV(, poly, p, 8, 8, 16); ++ TEST_VREV(q, int, s, 8, 16, 16); ++ TEST_VREV(q, uint, u, 8, 16, 16); ++ TEST_VREV(q, poly, p, 8, 16, 16); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev16, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev16, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev16, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev16, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev16, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev16, ""); ++ ++#undef TEST_MSG ++#define TEST_MSG "VREV32" ++ TEST_VREV(, int, s, 8, 8, 32); ++ TEST_VREV(, int, s, 16, 4, 32); ++ TEST_VREV(, uint, u, 8, 8, 32); ++ TEST_VREV(, uint, u, 16, 4, 32); ++ TEST_VREV(, poly, p, 8, 8, 32); ++ TEST_VREV(, poly, p, 16, 4, 32); ++ TEST_VREV(q, int, s, 8, 16, 32); ++ TEST_VREV(q, int, s, 16, 8, 32); ++ TEST_VREV(q, uint, u, 8, 16, 32); ++ TEST_VREV(q, uint, u, 16, 8, 32); ++ TEST_VREV(q, poly, p, 8, 16, 32); ++ TEST_VREV(q, poly, p, 16, 8, 32); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev32, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev32, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev32, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev32, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev32, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev32, ""); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev32, ""); ++ ++#undef TEST_MSG ++#define TEST_MSG "VREV64" ++ TEST_VREV(, int, s, 8, 8, 64); ++ TEST_VREV(, int, s, 16, 4, 64); ++ TEST_VREV(, int, s, 32, 2, 64); ++ TEST_VREV(, uint, u, 8, 8, 64); ++ TEST_VREV(, uint, u, 16, 4, 64); ++ TEST_VREV(, uint, u, 32, 2, 64); ++ TEST_VREV(, poly, p, 8, 8, 64); ++ TEST_VREV(, poly, p, 16, 4, 64); ++ TEST_VREV(q, int, s, 8, 16, 64); ++ TEST_VREV(q, int, s, 16, 8, 64); ++ TEST_VREV(q, int, s, 32, 4, 64); ++ TEST_VREV(q, uint, u, 8, 16, 64); ++ TEST_VREV(q, uint, u, 16, 8, 64); ++ TEST_VREV(q, uint, u, 32, 4, 64); ++ TEST_VREV(q, poly, p, 8, 16, 64); ++ TEST_VREV(q, poly, p, 16, 8, 64); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev64, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_vrev64, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev64, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev64, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev64, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_vrev64, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); ++ ++ TEST_VREV(, float, f, 32, 2, 64); ++ TEST_VREV(q, float, f, 32, 4, 64); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, ""); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_vrev64, ""); +} + +int main (void) +{ -+ exec_vqrdmulh (); ++ exec_vrev (); + return 0; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c -@@ -0,0 +1,169 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshl.c +@@ -0,0 +1,627 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; ++/* Expected results with input=0. */ ++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 }; ++ ++/* Expected results with input=0 and negative shift amount. */ ++VECT_VAR_DECL(expected_0_sh_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0_sh_neg,uint,64,2) [] = { 0x0, 0x0 }; + +/* Expected results. */ -+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, ++ 0xe8, 0xea, 0xec, 0xee }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, ++ 0xe8, 0xea, 0xec, 0xee }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, ++ 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, ++ 0x4000, 0x5000, 0x6000, 0x7000 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; + -+/* Expected values of cumulative_saturation flag when multiplication -+ saturates. */ -+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; ++/* Expected results with negative shift amount. */ ++VECT_VAR_DECL(expected_sh_neg,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, ++ 0xfa, 0xfb, 0xfb, 0xfc }; ++VECT_VAR_DECL(expected_sh_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; ++VECT_VAR_DECL(expected_sh_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected_sh_neg,int,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected_sh_neg,uint,8,8) [] = { 0x78, 0x79, 0x79, 0x7a, ++ 0x7a, 0x7b, 0x7b, 0x7c }; ++VECT_VAR_DECL(expected_sh_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; ++VECT_VAR_DECL(expected_sh_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; ++VECT_VAR_DECL(expected_sh_neg,uint,64,1) [] = { 0xfffffffffffffff }; ++VECT_VAR_DECL(expected_sh_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2, ++ 0x2, 0x2, 0x2, 0x2 }; ++VECT_VAR_DECL(expected_sh_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_sh_neg,uint,32,4) [] = { 0x80000, 0x80000, ++ 0x80000, 0x80000 }; ++VECT_VAR_DECL(expected_sh_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; ++ ++/* Expected results with max input value shifted by -1 to test ++ round_const. */ ++VECT_VAR_DECL(expected_max_sh_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,16,4) [] = { 0x4000, 0x4000, ++ 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,32,2) [] = { 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,64,1) [] = { 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,16,8) [] = { 0x4000, 0x4000, ++ 0x4000, 0x4000, ++ 0x4000, 0x4000, ++ 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,int,64,2) [] = { 0x4000000000000000, ++ 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh_minus1,uint,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++ ++/* Expected results with max input value shifted by -3 to test ++ round_const. */ ++VECT_VAR_DECL(expected_max_sh_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,16,4) [] = { 0x1000, 0x1000, ++ 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,32,2) [] = { 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,64,1) [] = { 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,16,4) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,64,1) [] = { 0x2000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,16,8) [] = { 0x1000, 0x1000, ++ 0x1000, 0x1000, ++ 0x1000, 0x1000, ++ 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,32,4) [] = { 0x10000000, 0x10000000, ++ 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,int,64,2) [] = { 0x1000000000000000, ++ 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,16,8) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,32,4) [] = { 0x20000000, 0x20000000, ++ 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh_minus3,uint,64,2) [] = { 0x2000000000000000, ++ 0x2000000000000000 }; ++ ++/* Expected results with negative shift by vector width. */ ++VECT_VAR_DECL(expected_max_sh_minus_width,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_minus_width,uint,64,2) [] = { 0x1, 0x1 }; + -+/* Expected results when multiplication saturates. */ -+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; ++/* Expected results with large shift amount. */ ++VECT_VAR_DECL(expected_max_sh_large,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large,uint,64,2) [] = { 0x0, 0x0 }; + -+/* Expected values of cumulative_saturation flag when rounding -+ should not cause saturation. */ -+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; ++/* Expected results with large negative shift amount. */ ++VECT_VAR_DECL(expected_max_sh_large_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_large_neg,uint,64,2) [] = { 0x1, 0x1 }; + -+/* Expected results when rounding should not cause saturation. */ -+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; ++#define TEST_MSG "VRSHL/VRSHLQ" ++void exec_vrshl (void) ++{ ++ /* Basic test: v3=vrshl(v1,v2), then store the result. */ ++#define TEST_VRSHL(T3, Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector_shift, T3, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + -+#define INSN vqrdmulh -+#define TEST_MSG "VQRDMULH_LANE" ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + -+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + -+FNNAME (INSN) -+{ -+ /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ -+#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N2), \ -+ L); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ clean_results (); + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Fill input vector with 0, to check behavior on limits. */ ++ VDUP(vector, , int, s, 8, 8, 0); ++ VDUP(vector, , int, s, 16, 4, 0); ++ VDUP(vector, , int, s, 32, 2, 0); ++ VDUP(vector, , int, s, 64, 1, 0); ++ VDUP(vector, , uint, u, 8, 8, 0); ++ VDUP(vector, , uint, u, 16, 4, 0); ++ VDUP(vector, , uint, u, 32, 2, 0); ++ VDUP(vector, , uint, u, 64, 1, 0); ++ VDUP(vector, q, int, s, 8, 16, 0); ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 8, 16, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0); ++ VDUP(vector, q, uint, u, 32, 4, 0); ++ VDUP(vector, q, uint, u, 64, 2, 0); + -+#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Choose init value arbitrarily, will be used as shift amount. */ ++ /* Use values equal to one-less-than the type width to check ++ behaviour on limits. */ ++ VDUP(vector_shift, , int, s, 8, 8, 7); ++ VDUP(vector_shift, , int, s, 16, 4, 15); ++ VDUP(vector_shift, , int, s, 32, 2, 31); ++ VDUP(vector_shift, , int, s, 64, 1, 63); ++ VDUP(vector_shift, q, int, s, 8, 16, 7); ++ VDUP(vector_shift, q, int, s, 16, 8, 15); ++ VDUP(vector_shift, q, int, s, 32, 4, 31); ++ VDUP(vector_shift, q, int, s, 64, 2, 63); ++ ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); ++ ++#define CMT " (with input = 0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT); ++ ++ ++ /* Use negative shift amounts. */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); ++ ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + ++#undef CMT ++#define CMT " (input 0 and negative shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_sh_neg, CMT); ++ ++ ++ /* Test again, with predefined input values. */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose init value arbitrarily, will be used as shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 1); ++ VDUP(vector_shift, , int, s, 16, 4, 3); ++ VDUP(vector_shift, , int, s, 32, 2, 8); ++ VDUP(vector_shift, , int, s, 64, 1, -3); ++ VDUP(vector_shift, q, int, s, 8, 16, 10); ++ VDUP(vector_shift, q, int, s, 16, 8, 12); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 63); ++ ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); ++ ++#undef CMT ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++ ++ ++ /* Use negative shift amounts. */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -2); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -4); ++ VDUP(vector_shift, q, int, s, 8, 16, -7); ++ VDUP(vector_shift, q, int, s, 16, 8, -11); ++ VDUP(vector_shift, q, int, s, 32, 4, -13); ++ VDUP(vector_shift, q, int, s, 64, 2, -20); ++ ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); ++ ++#undef CMT ++#define CMT " (negative shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh_neg, CMT); ++ ++ /* Fill input vector with max value, to check behavior on limits. */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + -+ DECL_VARIABLE(vector, int, 16, 4); -+ DECL_VARIABLE(vector, int, 32, 2); -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); ++ /* Use -1 shift amount to check overflow with round_const. */ ++ VDUP(vector_shift, , int, s, 8, 8, -1); ++ VDUP(vector_shift, , int, s, 16, 4, -1); ++ VDUP(vector_shift, , int, s, 32, 2, -1); ++ VDUP(vector_shift, , int, s, 64, 1, -1); ++ VDUP(vector_shift, q, int, s, 8, 16, -1); ++ VDUP(vector_shift, q, int, s, 16, 8, -1); ++ VDUP(vector_shift, q, int, s, 32, 4, -1); ++ VDUP(vector_shift, q, int, s, 64, 2, -1); + -+ DECL_VARIABLE(vector_res, int, 16, 4); -+ DECL_VARIABLE(vector_res, int, 32, 2); -+ DECL_VARIABLE(vector_res, int, 16, 8); -+ DECL_VARIABLE(vector_res, int, 32, 4); ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + -+ /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with -+ the same number of elements, so we need only one variable of each -+ type. */ -+ DECL_VARIABLE(vector2, int, 16, 4); -+ DECL_VARIABLE(vector2, int, 32, 2); ++#undef CMT ++#define CMT " (max input, shift by -1)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus1, CMT); + -+ clean_results (); ++ /* Use -3 shift amount to check overflow with round_const. */ ++ VDUP(vector_shift, , int, s, 8, 8, -3); ++ VDUP(vector_shift, , int, s, 16, 4, -3); ++ VDUP(vector_shift, , int, s, 32, 2, -3); ++ VDUP(vector_shift, , int, s, 64, 1, -3); ++ VDUP(vector_shift, q, int, s, 8, 16, -3); ++ VDUP(vector_shift, q, int, s, 16, 8, -3); ++ VDUP(vector_shift, q, int, s, 32, 4, -3); ++ VDUP(vector_shift, q, int, s, 64, 2, -3); + -+ VLOAD(vector, buffer, , int, s, 16, 4); -+ VLOAD(vector, buffer, , int, s, 32, 2); ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + -+ VLOAD(vector, buffer, q, int, s, 16, 8); -+ VLOAD(vector, buffer, q, int, s, 32, 4); ++#undef CMT ++#define CMT " (check rounding constant: max input, shift by -3)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus3, CMT); ++ ++ ++ /* Use negative shift amount as large as input vector width. */ ++ VDUP(vector_shift, , int, s, 8, 8, -8); ++ VDUP(vector_shift, , int, s, 16, 4, -16); ++ VDUP(vector_shift, , int, s, 32, 2, -32); ++ VDUP(vector_shift, , int, s, 64, 1, -64); ++ VDUP(vector_shift, q, int, s, 8, 16, -8); ++ VDUP(vector_shift, q, int, s, 16, 8, -16); ++ VDUP(vector_shift, q, int, s, 32, 4, -32); ++ VDUP(vector_shift, q, int, s, 64, 2, -64); + -+ /* Initialize vector2. */ -+ VDUP(vector2, , int, s, 16, 4, 0x55); -+ VDUP(vector2, , int, s, 32, 2, 0xBB); ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + -+ /* Choose lane arbitrarily. */ -+#define CMT "" -+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT); ++#undef CMT ++#define CMT " (max input, right shift by vector width)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus_width, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus_width, CMT); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + -+ /* Now use input values such that the multiplication causes -+ saturation. */ -+#define TEST_MSG_MUL " (check mul cumulative saturation)" -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector2, , int, s, 16, 4, 0x8000); -+ VDUP(vector2, , int, s, 32, 2, 0x80000000); ++ /* Test large shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, 10); ++ VDUP(vector_shift, , int, s, 16, 4, 20); ++ VDUP(vector_shift, , int, s, 32, 2, 33); ++ VDUP(vector_shift, , int, s, 64, 1, 65); ++ VDUP(vector_shift, q, int, s, 8, 16, 9); ++ VDUP(vector_shift, q, int, s, 16, 8, 16); ++ VDUP(vector_shift, q, int, s, 32, 4, 32); ++ VDUP(vector_shift, q, int, s, 64, 2, 64); + -+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); ++#undef CMT ++#define CMT " (max input, large shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_large, CMT); + -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector2, , int, s, 16, 4, 0x8001); -+ VDUP(vector2, , int, s, 32, 2, 0x80000001); ++ ++ /* Test large negative shift amount. */ ++ VDUP(vector_shift, , int, s, 8, 8, -10); ++ VDUP(vector_shift, , int, s, 16, 4, -20); ++ VDUP(vector_shift, , int, s, 32, 2, -33); ++ VDUP(vector_shift, , int, s, 64, 1, -65); ++ VDUP(vector_shift, q, int, s, 8, 16, -9); ++ VDUP(vector_shift, q, int, s, 16, 8, -16); ++ VDUP(vector_shift, q, int, s, 32, 4, -32); ++ VDUP(vector_shift, q, int, s, 64, 2, -64); + -+ /* Use input values where rounding produces a result equal to the -+ saturation value, but does not set the saturation flag. */ -+#define TEST_MSG_ROUND " (check rounding)" -+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); ++ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++#undef CMT ++#define CMT " (max input, large negative shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_large_neg, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_large_neg, CMT); +} + +int main (void) +{ -+ exec_vqrdmulh_lane (); ++ exec_vrshl (); + return 0; +} -+ --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c -@@ -0,0 +1,155 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshr_n.c +@@ -0,0 +1,504 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+ +/* Expected results. */ -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5, -+ 0x5, 0x4, 0x4, 0x4 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, -+ 0xfffffffe, 0xfffffffe }; ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, ++ 0xfa, 0xfb, 0xfb, 0xfc }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, ++ 0x3d, 0x3d, 0x3e, 0x3e }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x8000000, 0x8000000 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x80000000 }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xf9, 0xfa, ++ 0xfa, 0xfb, 0xfb, 0xfc, ++ 0xfc, 0xfd, 0xfd, 0xfe, ++ 0xfe, 0xff, 0xff, 0x0 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, ++ 0xfffffffd, 0xfffffffd }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3d, 0x3d, ++ 0x3d, 0x3d, 0x3e, 0x3e, ++ 0x3e, 0x3e, 0x3f, 0x3f, ++ 0x3f, 0x3f, 0x40, 0x40 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, ++ 0x1fff, 0x1fff, 0x1fff, 0x1fff }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x8000000, 0x8000000, ++ 0x8000000, 0x8000000 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x80000000, 0x80000000 }; + -+/* Expected values of cumulative_saturation flag when multiplication -+ saturates. */ -+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1; ++/* Expected results with maximum input and max shift amount. */ ++VECT_VAR_DECL(expected_max_sh_max,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_sh_max,uint,64,2) [] = { 0x1, 0x1 }; ++ ++/* Expected results with maximum input and shift by 1. */ ++VECT_VAR_DECL(expected_max_sh_1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_max_sh_1,int,16,4) [] = { 0x4000, 0x4000, ++ 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh_1,int,32,2) [] = { 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh_1,int,64,1) [] = { 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_max_sh_1,int,16,8) [] = { 0x4000, 0x4000, ++ 0x4000, 0x4000, ++ 0x4000, 0x4000, ++ 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh_1,int,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh_1,int,64,2) [] = { 0x4000000000000000, ++ 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh_1,uint,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; + -+/* Expected results when multiplication saturates. */ -+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; ++/* Expected results with maximum input and shift by 3. */ ++VECT_VAR_DECL(expected_max_sh_3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_max_sh_3,int,16,4) [] = { 0x1000, 0x1000, ++ 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh_3,int,32,2) [] = { 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh_3,int,64,1) [] = { 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,16,4) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,32,2) [] = { 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,64,1) [] = { 0x2000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_max_sh_3,int,16,8) [] = { 0x1000, 0x1000, ++ 0x1000, 0x1000, ++ 0x1000, 0x1000, ++ 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh_3,int,32,4) [] = { 0x10000000, 0x10000000, ++ 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh_3,int,64,2) [] = { 0x1000000000000000, ++ 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,16,8) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,32,4) [] = { 0x20000000, 0x20000000, ++ 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh_3,uint,64,2) [] = { 0x2000000000000000, ++ 0x2000000000000000 }; ++ ++/* Expected results with max negative input (for signed types, shift ++ by 1. */ ++VECT_VAR_DECL(expected_max_neg_sh_1,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,16,4) [] = { 0xc000, 0xc000, ++ 0xc000, 0xc000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,32,2) [] = { 0xc0000000, 0xc0000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,64,1) [] = { 0xc000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,16,8) [] = { 0xc000, 0xc000, ++ 0xc000, 0xc000, ++ 0xc000, 0xc000, ++ 0xc000, 0xc000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,32,4) [] = { 0xc0000000, 0xc0000000, ++ 0xc0000000, 0xc0000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,int,64,2) [] = { 0xc000000000000000, ++ 0xc000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_1,uint,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; ++ ++/* Expected results with max negative input (for signed types, shift ++ by 3. */ ++VECT_VAR_DECL(expected_max_neg_sh_3,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,16,4) [] = { 0xf000, 0xf000, ++ 0xf000, 0xf000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,32,2) [] = { 0xf0000000, 0xf0000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,64,1) [] = { 0xf000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,16,4) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,32,2) [] = { 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,64,1) [] = { 0x2000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,16,8) [] = { 0xf000, 0xf000, ++ 0xf000, 0xf000, ++ 0xf000, 0xf000, ++ 0xf000, 0xf000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,32,4) [] = { 0xf0000000, 0xf0000000, ++ 0xf0000000, 0xf0000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,int,64,2) [] = { 0xf000000000000000, ++ 0xf000000000000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,16,8) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,32,4) [] = { 0x20000000, 0x20000000, ++ 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_neg_sh_3,uint,64,2) [] = { 0x2000000000000000, ++ 0x2000000000000000 }; + -+/* Expected values of cumulative_saturation flag when rounding -+ should not cause saturation. */ -+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0; ++#define TEST_MSG "VRSHR_N" ++void exec_vrshr_n (void) ++{ ++ /* Basic test: y=vrshr_n(x,v), then store the result. */ ++#define TEST_VRSHR_N(Q, T1, T2, W, N, V) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + -+/* Expected results when rounding should not cause saturation. */ -+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + -+#define INSN vqrdmulh -+#define TEST_MSG "VQRDMULH_N" ++ clean_results (); + -+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + -+FNNAME (INSN) -+{ -+ int i; ++ /* Choose shift amount arbitrarily. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 1); ++ TEST_VRSHR_N(, int, s, 16, 4, 12); ++ TEST_VRSHR_N(, int, s, 32, 2, 2); ++ TEST_VRSHR_N(, int, s, 64, 1, 32); ++ TEST_VRSHR_N(, uint, u, 8, 8, 2); ++ TEST_VRSHR_N(, uint, u, 16, 4, 3); ++ TEST_VRSHR_N(, uint, u, 32, 2, 5); ++ TEST_VRSHR_N(, uint, u, 64, 1, 33); ++ ++ TEST_VRSHR_N(q, int, s, 8, 16, 1); ++ TEST_VRSHR_N(q, int, s, 16, 8, 12); ++ TEST_VRSHR_N(q, int, s, 32, 4, 2); ++ TEST_VRSHR_N(q, int, s, 64, 2, 32); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 2); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 3); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 5); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 33); + -+ /* vector_res = vqrdmulh_n(vector,val), then store the result. */ -+#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ L); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++ ++ ++ /* Use maximum positive input value. */ ++ VDUP(vector, , int, s, 8, 8, 0x7F); ++ VDUP(vector, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x7F); ++ VDUP(vector, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++ /* Use max shift amount, to exercise saturation. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 8); ++ TEST_VRSHR_N(, int, s, 16, 4, 16); ++ TEST_VRSHR_N(, int, s, 32, 2, 32); ++ TEST_VRSHR_N(, int, s, 64, 1, 64); ++ TEST_VRSHR_N(, uint, u, 8, 8, 8); ++ TEST_VRSHR_N(, uint, u, 16, 4, 16); ++ TEST_VRSHR_N(, uint, u, 32, 2, 32); ++ TEST_VRSHR_N(, uint, u, 64, 1, 64); ++ TEST_VRSHR_N(q, int, s, 8, 16, 8); ++ TEST_VRSHR_N(q, int, s, 16, 8, 16); ++ TEST_VRSHR_N(q, int, s, 32, 4, 32); ++ TEST_VRSHR_N(q, int, s, 64, 2, 64); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 8); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 16); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 32); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 64); ++ ++#undef CMT ++#define CMT " (overflow test: max shift amount, max positive input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_max, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_max, CMT); ++ ++ ++ /* Use 1 as shift amount, to exercise saturation. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 1); ++ TEST_VRSHR_N(, int, s, 16, 4, 1); ++ TEST_VRSHR_N(, int, s, 32, 2, 1); ++ TEST_VRSHR_N(, int, s, 64, 1, 1); ++ TEST_VRSHR_N(, uint, u, 8, 8, 1); ++ TEST_VRSHR_N(, uint, u, 16, 4, 1); ++ TEST_VRSHR_N(, uint, u, 32, 2, 1); ++ TEST_VRSHR_N(, uint, u, 64, 1, 1); ++ TEST_VRSHR_N(q, int, s, 8, 16, 1); ++ TEST_VRSHR_N(q, int, s, 16, 8, 1); ++ TEST_VRSHR_N(q, int, s, 32, 4, 1); ++ TEST_VRSHR_N(q, int, s, 64, 2, 1); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 1); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 1); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 1); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 1); ++ ++#undef CMT ++#define CMT " (overflow test: shift by 1, with max input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_1, CMT); ++ ++ ++ /* Use 3 as shift amount, to exercise saturation. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 3); ++ TEST_VRSHR_N(, int, s, 16, 4, 3); ++ TEST_VRSHR_N(, int, s, 32, 2, 3); ++ TEST_VRSHR_N(, int, s, 64, 1, 3); ++ TEST_VRSHR_N(, uint, u, 8, 8, 3); ++ TEST_VRSHR_N(, uint, u, 16, 4, 3); ++ TEST_VRSHR_N(, uint, u, 32, 2, 3); ++ TEST_VRSHR_N(, uint, u, 64, 1, 3); ++ TEST_VRSHR_N(q, int, s, 8, 16, 3); ++ TEST_VRSHR_N(q, int, s, 16, 8, 3); ++ TEST_VRSHR_N(q, int, s, 32, 4, 3); ++ TEST_VRSHR_N(q, int, s, 64, 2, 3); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 3); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 3); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 3); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 3); ++ ++#undef CMT ++#define CMT " (overflow test: shift by 3, with max input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_3, CMT); ++ ++ ++ /* Use minimum negative input for signed types. */ ++ VDUP(vector, , int, s, 8, 8, 0x80); ++ VDUP(vector, , int, s, 16, 4, 0x8000); ++ VDUP(vector, , int, s, 32, 2, 0x80000000); ++ VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); ++ VDUP(vector, , uint, u, 8, 8, 0xFF); ++ VDUP(vector, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector, q, int, s, 8, 16, 0x80); ++ VDUP(vector, q, int, s, 16, 8, 0x8000); ++ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++ VDUP(vector, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++ ++ /* Use 1 as shift amount, to exercise saturation code. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 1); ++ TEST_VRSHR_N(, int, s, 16, 4, 1); ++ TEST_VRSHR_N(, int, s, 32, 2, 1); ++ TEST_VRSHR_N(, int, s, 64, 1, 1); ++ TEST_VRSHR_N(, uint, u, 8, 8, 1); ++ TEST_VRSHR_N(, uint, u, 16, 4, 1); ++ TEST_VRSHR_N(, uint, u, 32, 2, 1); ++ TEST_VRSHR_N(, uint, u, 64, 1, 1); ++ TEST_VRSHR_N(q, int, s, 8, 16, 1); ++ TEST_VRSHR_N(q, int, s, 16, 8, 1); ++ TEST_VRSHR_N(q, int, s, 32, 4, 1); ++ TEST_VRSHR_N(q, int, s, 64, 2, 1); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 1); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 1); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 1); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 1); ++ ++#undef CMT ++#define CMT " (overflow test: shift by 1, with negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_neg_sh_1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_neg_sh_1, CMT); ++ ++ ++ /* Use 3 as shift amount, to exercise saturation code. */ ++ TEST_VRSHR_N(, int, s, 8, 8, 3); ++ TEST_VRSHR_N(, int, s, 16, 4, 3); ++ TEST_VRSHR_N(, int, s, 32, 2, 3); ++ TEST_VRSHR_N(, int, s, 64, 1, 3); ++ TEST_VRSHR_N(, uint, u, 8, 8, 3); ++ TEST_VRSHR_N(, uint, u, 16, 4, 3); ++ TEST_VRSHR_N(, uint, u, 32, 2, 3); ++ TEST_VRSHR_N(, uint, u, 64, 1, 3); ++ TEST_VRSHR_N(q, int, s, 8, 16, 3); ++ TEST_VRSHR_N(q, int, s, 16, 8, 3); ++ TEST_VRSHR_N(q, int, s, 32, 4, 3); ++ TEST_VRSHR_N(q, int, s, 64, 2, 3); ++ TEST_VRSHR_N(q, uint, u, 8, 16, 3); ++ TEST_VRSHR_N(q, uint, u, 16, 8, 3); ++ TEST_VRSHR_N(q, uint, u, 32, 4, 3); ++ TEST_VRSHR_N(q, uint, u, 64, 2, 3); ++ ++#undef CMT ++#define CMT " (overflow test: shift by 3, with negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_neg_sh_3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_neg_sh_3, CMT); ++} + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) ++int main (void) ++{ ++ exec_vrshr_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshrn_n.c +@@ -0,0 +1,143 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) ++/* Expected results with input=0. */ ++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; + ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, ++ 0xfa, 0xfb, 0xfb, 0xfc }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, ++ 0xfd, 0xfd, 0xfe, 0xfe }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; ++ ++/* Expected results with large shift amount. */ ++VECT_VAR_DECL(expected_sh_large,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_large,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_large,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_large,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_large,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_sh_large,uint,32,2) [] = { 0x0, 0x0 }; + -+ DECL_VARIABLE(vector, int, 16, 4); -+ DECL_VARIABLE(vector, int, 32, 2); ++#define TEST_MSG "VRSHRN_N" ++void exec_vrshrn_n (void) ++{ ++ /* Basic test: v2=vrshrn_n(v1,v), then store the result. */ ++#define TEST_VRSHRN_N(T1, T2, W, N, W2, V) \ ++ VECT_VAR(vector_res, T1, W2, N) = \ ++ vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) ++ ++ /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ DECL_VARIABLE(vector, uint, 16, 8); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, uint, 64, 2); + ++ DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); -+ DECL_VARIABLE(vector_res, int, 16, 8); -+ DECL_VARIABLE(vector_res, int, 32, 4); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + -+ VLOAD(vector, buffer, , int, s, 16, 4); -+ VLOAD(vector, buffer, , int, s, 32, 2); ++ /* Fill input vector with 0, to check behavior on limits. */ ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0); ++ VDUP(vector, q, uint, u, 32, 4, 0); ++ VDUP(vector, q, uint, u, 64, 2, 0); ++ ++ /* Choose shift amount arbitrarily. */ ++ TEST_VRSHRN_N(int, s, 16, 8, 8, 1); ++ TEST_VRSHRN_N(int, s, 32, 4, 16, 1); ++ TEST_VRSHRN_N(int, s, 64, 2, 32, 2); ++ TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); ++ TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); ++ TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); ++ ++#define CMT " (with input = 0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); ++ ++ ++ /* Test again, with predefined input values. */ + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); ++ VLOAD(vector, buffer, q, int, s, 64, 2); ++ VLOAD(vector, buffer, q, uint, u, 16, 8); ++ VLOAD(vector, buffer, q, uint, u, 32, 4); ++ VLOAD(vector, buffer, q, uint, u, 64, 2); + -+ /* Choose multiplier arbitrarily. */ -+#define CMT "" -+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT); -+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT); ++ /* Choose shift amount arbitrarily. */ ++ TEST_VRSHRN_N(int, s, 16, 8, 8, 1); ++ TEST_VRSHRN_N(int, s, 32, 4, 16, 1); ++ TEST_VRSHRN_N(int, s, 64, 2, 32, 2); ++ TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); ++ TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); ++ TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + ++#undef CMT ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); + CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); -+ -+ /* Now use input values such that the multiplication causes -+ saturation. */ -+#define TEST_MSG_MUL " (check mul cumulative saturation)" -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ -+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); -+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul, -+ TEST_MSG_MUL); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); + -+ /* Use input values where rounding produces a result equal to the -+ saturation value, but does not set the saturation flag. */ -+#define TEST_MSG_ROUND " (check rounding)" -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); ++ /* Fill input arbitrary values. */ ++ VDUP(vector, q, int, s, 16, 8, 30); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0xFFF0); ++ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0); ++ VDUP(vector, q, uint, u, 64, 2, 0); + -+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); -+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round, -+ TEST_MSG_ROUND); ++ /* Choose large shift amount arbitrarily. */ ++ TEST_VRSHRN_N(int, s, 16, 8, 8, 7); ++ TEST_VRSHRN_N(int, s, 32, 4, 16, 14); ++ TEST_VRSHRN_N(int, s, 64, 2, 32, 31); ++ TEST_VRSHRN_N(uint, u, 16, 8, 8, 7); ++ TEST_VRSHRN_N(uint, u, 32, 4, 16, 16); ++ TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); ++#undef CMT ++#define CMT " (with large shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh_large, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh_large, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh_large, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh_large, CMT); +} + +int main (void) +{ -+ exec_vqrdmulh_n (); ++ exec_vrshrn_n (); + return 0; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c -@@ -0,0 +1,1090 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c +@@ -0,0 +1,157 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" ++#include + -+/* Expected values of cumulative_saturation flag with input=0. */ -+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0; ++/* Expected results. */ ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000, ++ 0x9c800000, 0x9c800000 }; ++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 }; ++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000, ++ 0x3e700000, 0x3e700000 }; ++ ++/* Expected results with large uint #1. */ ++VECT_VAR_DECL(expected_1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_1,uint,32,4) [] = { 0xae800000, 0xae800000, ++ 0xae800000, 0xae800000 }; ++ ++/* Expected results with large uint #2. */ ++VECT_VAR_DECL(expected_2,uint,32,2) [] = { 0xb4800000, 0xb4800000 }; ++VECT_VAR_DECL(expected_2,uint,32,4) [] = { 0xed000000, 0xed000000, ++ 0xed000000, 0xed000000 }; ++ ++/* Expected results with FP special inputs values (NaNs, ...). */ ++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; ++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, ++ 0x7f800000, 0x7f800000 }; ++ ++/* Expected results with FP special inputs values ++ (negative, infinity). */ ++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; ++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected results with FP special inputs values ++ (-0, -infinity). */ ++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; ++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, ++ 0x7fc00000, 0x7fc00000 }; + -+/* Expected results with input=0. */ -+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 }; ++#define TEST_MSG "VRSQRTE/VRSQRTEQ" ++void exec_vrsqrte(void) ++{ ++ int i; + -+/* Expected values of cumulative_saturation flag with input=0 and -+ negative shift amount. */ -+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0; ++ /* Basic test: y=vrsqrte(x), then store the result. */ ++#define TEST_VRSQRTE(Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)) + -+/* Expected results with input=0 and negative shift amount. */ -+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 }; ++ DECL_VARIABLE(vector, uint, 32, 2); ++ DECL_VARIABLE(vector, float, 32, 2); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, float, 32, 4); ++ ++ DECL_VARIABLE(vector_res, uint, 32, 2); ++ DECL_VARIABLE(vector_res, float, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 32, 4); ++ DECL_VARIABLE(vector_res, float, 32, 4); ++ ++ clean_results (); ++ ++ /* Choose init value arbitrarily. */ ++ VDUP(vector, , uint, u, 32, 2, 0x12345678); ++ VDUP(vector, , float, f, 32, 2, 25.799999f); ++ VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); ++ VDUP(vector, q, float, f, 32, 4, 18.2f); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, uint, u, 32, 2); ++ TEST_VRSQRTE(, float, f, 32, 2); ++ TEST_VRSQRTE(q, uint, u, 32, 4); ++ TEST_VRSQRTE(q, float, f, 32, 4); ++ ++#define CMT "" ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); ++ ++ ++ /* Don't test FP variants with negative inputs. */ ++ /* Use input with various values of bits 30 and 31. */ ++ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector, q, uint, u, 32, 4, 0x89081234); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, uint, u, 32, 2); ++ TEST_VRSQRTE(q, uint, u, 32, 4); ++ ++#undef CMT ++#define CMT " (large uint #1)" ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_1, CMT); ++ ++ ++ /* Choose init value arbitrarily. */ ++ VDUP(vector, , uint, u, 32, 2, 0x80000000); ++ VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, uint, u, 32, 2); ++ TEST_VRSQRTE(q, uint, u, 32, 4); ++ ++#undef CMT ++#define CMT " (large uint #2)" ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_2, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_2, CMT); ++ ++ ++ /* Test FP variants with special input values (NaNs, ...). */ ++ VDUP(vector, , float, f, 32, 2, NAN); ++ VDUP(vector, q, float, f, 32, 4, 0.0f); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, float, f, 32, 2); ++ TEST_VRSQRTE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (NaN, 0)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); ++ ++ ++ /* Test FP variants with special input values (negative, infinity). */ ++ VDUP(vector, , float, f, 32, 2, -1.0f); ++ VDUP(vector, q, float, f, 32, 4, HUGE_VALF); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, float, f, 32, 2); ++ TEST_VRSQRTE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (negative, infinity)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); ++ ++ /* Test FP variants with special input values (-0, -infinity). */ ++ VDUP(vector, , float, f, 32, 2, -0.0f); ++ VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTE(, float, f, 32, 2); ++ TEST_VRSQRTE(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (-0, -infinity)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); ++} + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++int main (void) ++{ ++ exec_vrsqrte (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c +@@ -0,0 +1,118 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++#include + +/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, -+ 0xe8, 0xea, 0xec, 0xee }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, -+ 0x8000, 0x8000, 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, -+ 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 }; ++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8, ++ 0xc0e4a3d8, 0xc0e4a3d8 }; ++ ++/* Expected results with input=NaN. */ ++VECT_VAR_DECL(expected_nan,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; ++VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, ++ 0x7fc00000, 0x7fc00000 }; ++ ++/* Expected results with FP special inputs values (infinity, 0). */ ++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; ++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, ++ 0x3fc00000, 0x3fc00000 }; ++ ++/* Expected results with only FP special inputs values (infinity, ++ 0). */ ++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 }; ++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, ++ 0x3fc00000, 0x3fc00000 }; + -+/* Expected values of cumulative_saturation flag with negative shift -+ amount. */ -+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0; ++#define TEST_MSG "VRSQRTS/VRSQRTSQ" ++void exec_vrsqrts(void) ++{ ++ int i; + -+/* Expected results with negative shift amount. */ -+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, -+ 0xfd, 0xfd, 0xfe, 0xfe }; -+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; -+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; -+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, -+ 0x3d, 0x3d, 0x3e, 0x3e }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; -+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff }; -+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, -+ 0x2, 0x2, 0x2, 0x2, -+ 0x2, 0x2, 0x2, 0x2, -+ 0x2, 0x2, 0x2, 0x2 }; -+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, -+ 0x20, 0x20, 0x20, 0x20 }; -+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000, -+ 0x80000, 0x80000 }; -+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; ++ /* Basic test: y=vrsqrts(x), then store the result. */ ++#define TEST_VRSQRTS(Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N)); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)) ++ ++ /* No need for integer variants. */ ++ DECL_VARIABLE(vector, float, 32, 2); ++ DECL_VARIABLE(vector, float, 32, 4); + -+/* Expected values of cumulative_saturation flag with input=max and -+ shift by -1. */ -+int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0; ++ DECL_VARIABLE(vector2, float, 32, 2); ++ DECL_VARIABLE(vector2, float, 32, 4); + -+/* Expected results with input=max and shift by -1. */ -+VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, -+ 0x40, 0x40, 0x40, 0x40 }; -+VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; -+VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 }; -+VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 }; -+VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 }; -+VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, -+ 0x40, 0x40, 0x40, 0x40, -+ 0x40, 0x40, 0x40, 0x40, ++ DECL_VARIABLE(vector_res, float, 32, 2); ++ DECL_VARIABLE(vector_res, float, 32, 4); ++ ++ clean_results (); ++ ++ /* Choose init value arbitrarily. */ ++ VDUP(vector, , float, f, 32, 2, 12.9f); ++ VDUP(vector, q, float, f, 32, 4, 9.1f); ++ ++ VDUP(vector2, , float, f, 32, 2, 9.9f); ++ VDUP(vector2, q, float, f, 32, 4, 1.9f); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTS(, float, f, 32, 2); ++ TEST_VRSQRTS(q, float, f, 32, 4); ++ ++#define CMT "" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); ++ ++ ++ /* Test FP variants with special input values (NaN). */ ++ VDUP(vector, , float, f, 32, 2, NAN); ++ VDUP(vector2, q, float, f, 32, 4, NAN); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTS(, float, f, 32, 2); ++ TEST_VRSQRTS(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (NAN) and normal values" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_nan, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, CMT); ++ ++ ++ /* Test FP variants with special input values (infinity, 0). */ ++ VDUP(vector, , float, f, 32, 2, HUGE_VALF); ++ VDUP(vector, q, float, f, 32, 4, 0.0f); ++ /* Restore a normal value in vector2. */ ++ VDUP(vector2, q, float, f, 32, 4, 3.2f); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTS(, float, f, 32, 2); ++ TEST_VRSQRTS(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " FP special (infinity, 0) and normal values" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); ++ ++ ++ /* Test FP variants with only special input values (infinity, 0). */ ++ VDUP(vector, , float, f, 32, 2, HUGE_VALF); ++ VDUP(vector, q, float, f, 32, 4, 0.0f); ++ VDUP(vector2, , float, f, 32, 2, -0.0f); ++ VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); ++ ++ /* Apply the operator. */ ++ TEST_VRSQRTS(, float, f, 32, 2); ++ TEST_VRSQRTS(q, float, f, 32, 4); ++ ++#undef CMT ++#define CMT " only FP special (infinity, 0)" ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); ++} ++ ++int main (void) ++{ ++ exec_vrsqrts (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsra_n.c +@@ -0,0 +1,553 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" ++ ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf9, 0xfa, 0xfb, 0xfc, ++ 0xfd, 0xfe, 0xff, 0x0 }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffd, 0xfffffffe }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, ++ 0x9, 0xa, 0xb, 0xc }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffd, 0xfffe, 0xffff, 0x0 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xfa, 0xfb, 0xfc, ++ 0xfd, 0xfe, 0xff, 0x0, ++ 0x1, 0x2, 0x3, 0x4, ++ 0x5, 0x6, 0x7, 0x8 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffe, ++ 0xffffffff, 0x0 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, ++ 0x9, 0xa, 0xb, 0xc, ++ 0xd, 0xe, 0xf, 0x10, ++ 0x11, 0x12, 0x13, 0x14 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, ++ 0x1, 0x2, 0x3, 0x4 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, ++ 0xfffffff6, 0xfffffff7 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, ++ 0xfffffffffffffff1 }; ++ ++/* Expected results with max input and shift by 1. */ ++VECT_VAR_DECL(expected_max_sh1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40 }; -+VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, -+ 0x4000, 0x4000, 0x4000, 0x4000 }; -+VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000, -+ 0x40000000, 0x40000000 }; -+VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000, -+ 0x4000000000000000 }; -+VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, ++VECT_VAR_DECL(expected_max_sh1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh1,int,32,2) [] = { 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh1,int,64,1) [] = { 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, -+ 0x8000, 0x8000, 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000, -+ 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000, -+ 0x8000000000000000 }; -+ -+/* Expected values of cumulative_saturation flag with input=max and -+ shift by -3. */ -+int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0; ++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x8000000000000000 }; ++VECT_VAR_DECL(expected_max_sh1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40, ++ 0x40, 0x40, 0x40, 0x40 }; ++VECT_VAR_DECL(expected_max_sh1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, ++ 0x4000, 0x4000, 0x4000, 0x4000 }; ++VECT_VAR_DECL(expected_max_sh1,int,32,4) [] = { 0x40000000, 0x40000000, ++ 0x40000000, 0x40000000 }; ++VECT_VAR_DECL(expected_max_sh1,int,64,2) [] = { 0x4000000000000000, ++ 0x4000000000000000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80, ++ 0x80, 0x80, 0x80, 0x80 }; ++VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000, ++ 0x8000, 0x8000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x80000000, 0x80000000, ++ 0x80000000, 0x80000000 }; ++VECT_VAR_DECL(expected_max_sh1,uint,64,2) [] = { 0x8000000000000000, ++ 0x8000000000000000 }; + -+/* Expected results with input=max and shift by -3. */ -+VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, -+ 0x10, 0x10, 0x10, 0x10 }; -+VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; -+VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 }; -+VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 }; -+VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, -+ 0x20, 0x20, 0x20, 0x20 }; -+VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; -+VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 }; -+VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 }; -+VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, -+ 0x10, 0x10, 0x10, 0x10, -+ 0x10, 0x10, 0x10, 0x10, ++/* Expected results with max input and shift by 3. */ ++VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10 }; -+VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, -+ 0x1000, 0x1000, 0x1000, 0x1000 }; -+VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000, -+ 0x10000000, 0x10000000 }; -+VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000, -+ 0x1000000000000000 }; -+VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, -+ 0x20, 0x20, 0x20, 0x20, -+ 0x20, 0x20, 0x20, 0x20, ++VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh3,int,64,1) [] = { 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20 }; -+VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, -+ 0x2000, 0x2000, 0x2000, 0x2000 }; -+VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000, -+ 0x20000000, 0x20000000 }; -+VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000, -+ 0x2000000000000000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,64,1) [] = { 0x2000000000000000 }; ++VECT_VAR_DECL(expected_max_sh3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10, ++ 0x10, 0x10, 0x10, 0x10 }; ++VECT_VAR_DECL(expected_max_sh3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, ++ 0x1000, 0x1000, 0x1000, 0x1000 }; ++VECT_VAR_DECL(expected_max_sh3,int,32,4) [] = { 0x10000000, 0x10000000, ++ 0x10000000, 0x10000000 }; ++VECT_VAR_DECL(expected_max_sh3,int,64,2) [] = { 0x1000000000000000, ++ 0x1000000000000000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20, ++ 0x20, 0x20, 0x20, 0x20 }; ++VECT_VAR_DECL(expected_max_sh3,uint,16,8) [] = { 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000, ++ 0x2000, 0x2000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,32,4) [] = { 0x20000000, 0x20000000, ++ 0x20000000, 0x20000000 }; ++VECT_VAR_DECL(expected_max_sh3,uint,64,2) [] = { 0x2000000000000000, ++ 0x2000000000000000 }; ++ ++/* Expected results with max input and shift by type width. */ ++VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_max_shmax,uint,64,2) [] = { 0x1, 0x1 }; ++ ++/* Expected results with min negative input and shift by 1. */ ++VECT_VAR_DECL(expected_min_sh1,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0 }; ++VECT_VAR_DECL(expected_min_sh1,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 }; ++VECT_VAR_DECL(expected_min_sh1,int,32,2) [] = { 0xc0000000, 0xc0000000 }; ++VECT_VAR_DECL(expected_min_sh1,int,64,1) [] = { 0xc000000000000000 }; ++VECT_VAR_DECL(expected_min_sh1,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0, ++ 0xc0, 0xc0, 0xc0, 0xc0 }; ++VECT_VAR_DECL(expected_min_sh1,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000, ++ 0xc000, 0xc000, 0xc000, 0xc000 }; ++VECT_VAR_DECL(expected_min_sh1,int,32,4) [] = { 0xc0000000, 0xc0000000, ++ 0xc0000000, 0xc0000000 }; ++VECT_VAR_DECL(expected_min_sh1,int,64,2) [] = { 0xc000000000000000, ++ 0xc000000000000000 }; ++VECT_VAR_DECL(expected_min_sh1,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh1,uint,64,2) [] = { 0x1, 0x1 }; ++ ++/* Expected results with min negative input and shift by 3. */ ++VECT_VAR_DECL(expected_min_sh3,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0 }; ++VECT_VAR_DECL(expected_min_sh3,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 }; ++VECT_VAR_DECL(expected_min_sh3,int,32,2) [] = { 0xf0000000, 0xf0000000 }; ++VECT_VAR_DECL(expected_min_sh3,int,64,1) [] = { 0xf000000000000000 }; ++VECT_VAR_DECL(expected_min_sh3,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0, ++ 0xf0, 0xf0, 0xf0, 0xf0 }; ++VECT_VAR_DECL(expected_min_sh3,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000, ++ 0xf000, 0xf000, 0xf000, 0xf000 }; ++VECT_VAR_DECL(expected_min_sh3,int,32,4) [] = { 0xf0000000, 0xf0000000, ++ 0xf0000000, 0xf0000000 }; ++VECT_VAR_DECL(expected_min_sh3,int,64,2) [] = { 0xf000000000000000, ++ 0xf000000000000000 }; ++VECT_VAR_DECL(expected_min_sh3,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_sh3,uint,64,2) [] = { 0x1, 0x1 }; ++ ++/* Expected results with min negative input and shift by type width. */ ++VECT_VAR_DECL(expected_min_shmax,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,64,1) [] = { 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,64,1) [] = { 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,int,64,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_min_shmax,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, ++ 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; ++VECT_VAR_DECL(expected_min_shmax,uint,64,2) [] = { 0x1, 0x1 }; ++ ++#define TEST_MSG "VRSRA_N" ++void exec_vrsra_n (void) ++{ ++ /* Basic test: y=vrsra_n(x,v), then store the result. */ ++#define TEST_VRSRA_N(Q, T1, T2, W, N, V) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vrsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) ++ ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector2); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ ++ clean_results (); ++ ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose arbitrary initialization values. */ ++ VDUP(vector2, , int, s, 8, 8, 0x11); ++ VDUP(vector2, , int, s, 16, 4, 0x22); ++ VDUP(vector2, , int, s, 32, 2, 0x33); ++ VDUP(vector2, , int, s, 64, 1, 0x44); ++ VDUP(vector2, , uint, u, 8, 8, 0x55); ++ VDUP(vector2, , uint, u, 16, 4, 0x66); ++ VDUP(vector2, , uint, u, 32, 2, 0x77); ++ VDUP(vector2, , uint, u, 64, 1, 0x88); ++ ++ VDUP(vector2, q, int, s, 8, 16, 0x11); ++ VDUP(vector2, q, int, s, 16, 8, 0x22); ++ VDUP(vector2, q, int, s, 32, 4, 0x33); ++ VDUP(vector2, q, int, s, 64, 2, 0x44); ++ VDUP(vector2, q, uint, u, 8, 16, 0x55); ++ VDUP(vector2, q, uint, u, 16, 8, 0x66); ++ VDUP(vector2, q, uint, u, 32, 4, 0x77); ++ VDUP(vector2, q, uint, u, 64, 2, 0x88); ++ ++ /* Choose shift amount arbitrarily. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 1); ++ TEST_VRSRA_N(, int, s, 16, 4, 12); ++ TEST_VRSRA_N(, int, s, 32, 2, 2); ++ TEST_VRSRA_N(, int, s, 64, 1, 32); ++ TEST_VRSRA_N(, uint, u, 8, 8, 2); ++ TEST_VRSRA_N(, uint, u, 16, 4, 3); ++ TEST_VRSRA_N(, uint, u, 32, 2, 5); ++ TEST_VRSRA_N(, uint, u, 64, 1, 33); ++ ++ TEST_VRSRA_N(q, int, s, 8, 16, 1); ++ TEST_VRSRA_N(q, int, s, 16, 8, 12); ++ TEST_VRSRA_N(q, int, s, 32, 4, 2); ++ TEST_VRSRA_N(q, int, s, 64, 2, 32); ++ TEST_VRSRA_N(q, uint, u, 8, 16, 2); ++ TEST_VRSRA_N(q, uint, u, 16, 8, 3); ++ TEST_VRSRA_N(q, uint, u, 32, 4, 5); ++ TEST_VRSRA_N(q, uint, u, 64, 2, 33); ++ ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); + -+/* Expected values of cumulative_saturation flag with input=max and -+ large shift amount. */ -+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1; + -+/* Expected results with input=max and large shift amount. */ -+VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff, -+ 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++ /* Initialize the accumulator with 0. */ ++ VDUP(vector, , int, s, 8, 8, 0); ++ VDUP(vector, , int, s, 16, 4, 0); ++ VDUP(vector, , int, s, 32, 2, 0); ++ VDUP(vector, , int, s, 64, 1, 0); ++ VDUP(vector, , uint, u, 8, 8, 0); ++ VDUP(vector, , uint, u, 16, 4, 0); ++ VDUP(vector, , uint, u, 32, 2, 0); ++ VDUP(vector, , uint, u, 64, 1, 0); ++ VDUP(vector, q, int, s, 8, 16, 0); ++ VDUP(vector, q, int, s, 16, 8, 0); ++ VDUP(vector, q, int, s, 32, 4, 0); ++ VDUP(vector, q, int, s, 64, 2, 0); ++ VDUP(vector, q, uint, u, 8, 16, 0); ++ VDUP(vector, q, uint, u, 16, 8, 0); ++ VDUP(vector, q, uint, u, 32, 4, 0); ++ VDUP(vector, q, uint, u, 64, 2, 0); + -+/* Expected values of cumulative_saturation flag with negative input and -+ large shift amount. */ -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1; ++ /* Initialize with max values to check overflow. */ ++ VDUP(vector2, , int, s, 8, 8, 0x7F); ++ VDUP(vector2, , int, s, 16, 4, 0x7FFF); ++ VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF); ++ VDUP(vector2, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector2, , uint, u, 8, 8, 0xFF); ++ VDUP(vector2, , uint, u, 16, 4, 0xFFFF); ++ VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFFF); ++ VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); ++ VDUP(vector2, q, int, s, 8, 16, 0x7F); ++ VDUP(vector2, q, int, s, 16, 8, 0x7FFF); ++ VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); ++ VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++ VDUP(vector2, q, uint, u, 8, 16, 0xFF); ++ VDUP(vector2, q, uint, u, 16, 8, 0xFFFF); ++ VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF); ++ VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ ++ /* Shift by 1 to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 1); ++ TEST_VRSRA_N(, int, s, 16, 4, 1); ++ TEST_VRSRA_N(, int, s, 32, 2, 1); ++ TEST_VRSRA_N(, int, s, 64, 1, 1); ++ TEST_VRSRA_N(, uint, u, 8, 8, 1); ++ TEST_VRSRA_N(, uint, u, 16, 4, 1); ++ TEST_VRSRA_N(, uint, u, 32, 2, 1); ++ TEST_VRSRA_N(, uint, u, 64, 1, 1); ++ TEST_VRSRA_N(q, int, s, 8, 16, 1); ++ TEST_VRSRA_N(q, int, s, 16, 8, 1); ++ TEST_VRSRA_N(q, int, s, 32, 4, 1); ++ TEST_VRSRA_N(q, int, s, 64, 2, 1); ++ TEST_VRSRA_N(q, uint, u, 8, 16, 1); ++ TEST_VRSRA_N(q, uint, u, 16, 8, 1); ++ TEST_VRSRA_N(q, uint, u, 32, 4, 1); ++ TEST_VRSRA_N(q, uint, u, 64, 2, 1); + -+/* Expected results with negative input and large shift amount. */ -+VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff, -+ 0xffffffff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000, -+ 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000, -+ 0x8000000000000000 }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff, -+ 0xffffffff, -+ 0xffffffff, -+ 0xffffffff }; -+VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++#undef CMT ++#define CMT " (checking overflow: shift by 1, max input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh1, CMT); ++ ++ ++ /* Shift by 3 to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 3); ++ TEST_VRSRA_N(, int, s, 16, 4, 3); ++ TEST_VRSRA_N(, int, s, 32, 2, 3); ++ TEST_VRSRA_N(, int, s, 64, 1, 3); ++ TEST_VRSRA_N(, uint, u, 8, 8, 3); ++ TEST_VRSRA_N(, uint, u, 16, 4, 3); ++ TEST_VRSRA_N(, uint, u, 32, 2, 3); ++ TEST_VRSRA_N(, uint, u, 64, 1, 3); ++ TEST_VRSRA_N(q, int, s, 8, 16, 3); ++ TEST_VRSRA_N(q, int, s, 16, 8, 3); ++ TEST_VRSRA_N(q, int, s, 32, 4, 3); ++ TEST_VRSRA_N(q, int, s, 64, 2, 3); ++ TEST_VRSRA_N(q, uint, u, 8, 16, 3); ++ TEST_VRSRA_N(q, uint, u, 16, 8, 3); ++ TEST_VRSRA_N(q, uint, u, 32, 4, 3); ++ TEST_VRSRA_N(q, uint, u, 64, 2, 3); + -+/* Expected values of cumulative_saturation flag with max/min input and -+ large negative shift amount. */ -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0; ++#undef CMT ++#define CMT " (checking overflow: shift by 3, max input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh3, CMT); ++ ++ ++ /* Shift by max to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 8); ++ TEST_VRSRA_N(, int, s, 16, 4, 16); ++ TEST_VRSRA_N(, int, s, 32, 2, 32); ++ TEST_VRSRA_N(, int, s, 64, 1, 64); ++ TEST_VRSRA_N(, uint, u, 8, 8, 8); ++ TEST_VRSRA_N(, uint, u, 16, 4, 16); ++ TEST_VRSRA_N(, uint, u, 32, 2, 32); ++ TEST_VRSRA_N(, uint, u, 64, 1, 64); ++ TEST_VRSRA_N(q, int, s, 8, 16, 8); ++ TEST_VRSRA_N(q, int, s, 16, 8, 16); ++ TEST_VRSRA_N(q, int, s, 32, 4, 32); ++ TEST_VRSRA_N(q, int, s, 64, 2, 64); ++ TEST_VRSRA_N(q, uint, u, 8, 16, 8); ++ TEST_VRSRA_N(q, uint, u, 16, 8, 16); ++ TEST_VRSRA_N(q, uint, u, 32, 4, 32); ++ TEST_VRSRA_N(q, uint, u, 64, 2, 64); + -+/* Expected results with max/min input and large negative shift amount. */ -+VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 }; ++#undef CMT ++#define CMT " (checking overflow: shift by max, max input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_shmax, CMT); + -+/* Expected values of cumulative_saturation flag with input=0 and -+ large negative shift amount. */ -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0; + -+/* Expected results with input=0 and large negative shift amount. */ -+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 }; ++ /* Initialize with min values to check overflow. */ ++ VDUP(vector2, , int, s, 8, 8, 0x80); ++ VDUP(vector2, , int, s, 16, 4, 0x8000); ++ VDUP(vector2, , int, s, 32, 2, 0x80000000); ++ VDUP(vector2, , int, s, 64, 1, 0x8000000000000000LL); ++ VDUP(vector2, q, int, s, 8, 16, 0x80); ++ VDUP(vector2, q, int, s, 16, 8, 0x8000); ++ VDUP(vector2, q, int, s, 32, 4, 0x80000000); ++ VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000ULL); + -+#define INSN vqrshl -+#define TEST_MSG "VQRSHL/VQRSHLQ" ++ /* Shift by 1 to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 1); ++ TEST_VRSRA_N(, int, s, 16, 4, 1); ++ TEST_VRSRA_N(, int, s, 32, 2, 1); ++ TEST_VRSRA_N(, int, s, 64, 1, 1); ++ TEST_VRSRA_N(q, int, s, 8, 16, 1); ++ TEST_VRSRA_N(q, int, s, 16, 8, 1); ++ TEST_VRSRA_N(q, int, s, 32, 4, 1); ++ TEST_VRSRA_N(q, int, s, 64, 2, 1); + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++#undef CMT ++#define CMT " (checking overflow: shift by 1, min negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_sh1, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_sh1, CMT); ++ ++ ++ /* Shift by 3 to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 3); ++ TEST_VRSRA_N(, int, s, 16, 4, 3); ++ TEST_VRSRA_N(, int, s, 32, 2, 3); ++ TEST_VRSRA_N(, int, s, 64, 1, 3); ++ TEST_VRSRA_N(q, int, s, 8, 16, 3); ++ TEST_VRSRA_N(q, int, s, 16, 8, 3); ++ TEST_VRSRA_N(q, int, s, 32, 4, 3); ++ TEST_VRSRA_N(q, int, s, 64, 2, 3); ++ ++#undef CMT ++#define CMT " (checking overflow: shift by 3, min negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_sh3, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_sh3, CMT); ++ ++ ++ /* Shift by max to check overflow with rounding constant. */ ++ TEST_VRSRA_N(, int, s, 8, 8, 8); ++ TEST_VRSRA_N(, int, s, 16, 4, 16); ++ TEST_VRSRA_N(, int, s, 32, 2, 32); ++ TEST_VRSRA_N(, int, s, 64, 1, 64); ++ TEST_VRSRA_N(q, int, s, 8, 16, 8); ++ TEST_VRSRA_N(q, int, s, 16, 8, 16); ++ TEST_VRSRA_N(q, int, s, 32, 4, 32); ++ TEST_VRSRA_N(q, int, s, 64, 2, 64); + -+FNNAME (INSN) -+{ -+ /* Basic test: v3=vqrshl(v1,v2), then store the result. */ -+#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector_shift, T3, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++#undef CMT ++#define CMT " (checking overflow: shift by max, min negative input)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_shmax, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_shmax, CMT); ++} + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++int main (void) ++{ ++ exec_vrsra_n (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc +@@ -68,7 +68,24 @@ void FNNAME (INSN_NAME) (void) + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 12); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); + + #ifdef EXTRA_TESTS + EXTRA_TESTS(); +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c +@@ -0,0 +1,99 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0x11 }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x22 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0x44 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0x55, 0xf7 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 }; ++VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0x55, 0xf7 }; ++VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; ++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0x99 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0xfff4, 0xaa, 0xfff6, 0xfff7 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xbb }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xcc }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xdd, 0xff }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, 0xee, 0xfff7 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xff, 0xfffffff3 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x11 }; ++VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xdd, 0xff }; ++VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, 0xee, 0xfff7 }; ++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0xc1600000, 0x41333333 }; ++ ++#define TEST_MSG "VSET_LANE/VSET_LANEQ" ++void exec_vset_lane (void) ++{ ++ /* vec=vset_lane(val, vec, lane), then store the result. */ ++#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \ ++ VECT_VAR(vector, T1, W, N) = \ ++ vset##Q##_lane_##T2##W(V, \ ++ VECT_VAR(vector, T1, W, N), \ ++ L); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); -+ -+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + -+ /* Fill input vector with 0, to check saturation on limits. */ -+ VDUP(vector, , int, s, 8, 8, 0); -+ VDUP(vector, , int, s, 16, 4, 0); -+ VDUP(vector, , int, s, 32, 2, 0); -+ VDUP(vector, , int, s, 64, 1, 0); -+ VDUP(vector, , uint, u, 8, 8, 0); -+ VDUP(vector, , uint, u, 16, 4, 0); -+ VDUP(vector, , uint, u, 32, 2, 0); -+ VDUP(vector, , uint, u, 64, 1, 0); -+ VDUP(vector, q, int, s, 8, 16, 0); -+ VDUP(vector, q, int, s, 16, 8, 0); -+ VDUP(vector, q, int, s, 32, 4, 0); -+ VDUP(vector, q, int, s, 64, 2, 0); -+ VDUP(vector, q, uint, u, 8, 16, 0); -+ VDUP(vector, q, uint, u, 16, 8, 0); -+ VDUP(vector, q, uint, u, 32, 4, 0); -+ VDUP(vector, q, uint, u, 64, 2, 0); -+ -+ /* Choose init value arbitrarily, will be used as shift amount */ -+ /* Use values equal to or one-less-than the type width to check -+ behaviour on limits. */ -+ VDUP(vector_shift, , int, s, 8, 8, 7); -+ VDUP(vector_shift, , int, s, 16, 4, 15); -+ VDUP(vector_shift, , int, s, 32, 2, 31); -+ VDUP(vector_shift, , int, s, 64, 1, 63); -+ VDUP(vector_shift, q, int, s, 8, 16, 8); -+ VDUP(vector_shift, q, int, s, 16, 8, 16); -+ VDUP(vector_shift, q, int, s, 32, 4, 32); -+ VDUP(vector_shift, q, int, s, 64, 2, 64); ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ VLOAD(vector, buffer, , float, f, 32, 2); ++ VLOAD(vector, buffer, q, float, f, 32, 4); + -+#define CMT " (with input = 0)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT); ++ /* Choose value and lane arbitrarily. */ ++ TEST_VSET_LANE(, int, s, 8, 8, 0x11, 7); ++ TEST_VSET_LANE(, int, s, 16, 4, 0x22, 3); ++ TEST_VSET_LANE(, int, s, 32, 2, 0x33, 1); ++ TEST_VSET_LANE(, int, s, 64, 1, 0x44, 0); ++ TEST_VSET_LANE(, uint, u, 8, 8, 0x55, 6); ++ TEST_VSET_LANE(, uint, u, 16, 4, 0x66, 2); ++ TEST_VSET_LANE(, uint, u, 32, 2, 0x77, 1); ++ TEST_VSET_LANE(, uint, u, 64, 1, 0x88, 0); ++ TEST_VSET_LANE(, poly, p, 8, 8, 0x55, 6); ++ TEST_VSET_LANE(, poly, p, 16, 4, 0x66, 2); ++ TEST_VSET_LANE(, float, f, 32, 2, 33.2f, 1); ++ ++ TEST_VSET_LANE(q, int, s, 8, 16, 0x99, 15); ++ TEST_VSET_LANE(q, int, s, 16, 8, 0xAA, 5); ++ TEST_VSET_LANE(q, int, s, 32, 4, 0xBB, 3); ++ TEST_VSET_LANE(q, int, s, 64, 2, 0xCC, 1); ++ TEST_VSET_LANE(q, uint, u, 8, 16, 0xDD, 14); ++ TEST_VSET_LANE(q, uint, u, 16, 8, 0xEE, 6); ++ TEST_VSET_LANE(q, uint, u, 32, 4, 0xFF, 2); ++ TEST_VSET_LANE(q, uint, u, 64, 2, 0x11, 1); ++ TEST_VSET_LANE(q, poly, p, 8, 16, 0xDD, 14); ++ TEST_VSET_LANE(q, poly, p, 16, 8, 0xEE, 6); ++ TEST_VSET_LANE(q, float, f, 32, 4, 11.2f, 3); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT); ++ CHECK_RESULTS(TEST_MSG, ""); ++} + ++int main (void) ++{ ++ exec_vset_lane (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c +@@ -13,11 +13,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, + 0x80, 0xa0, 0xc0, 0xe0, + 0x0, 0x20, 0x40, 0x60, +@@ -36,14 +31,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, + VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000, + 0x80000000, 0xc0000000 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results with large shift amount. */ + VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, +@@ -56,11 +43,6 @@ VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 }; + VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 }; +-VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +@@ -77,16 +59,6 @@ VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; + VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 }; +-VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + + /* Expected results with negative shift amount. */ +@@ -103,12 +75,6 @@ VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8, + VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc, + 0x3ffffffc }; + VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff }; +-VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333, +- 0x33333333 }; + VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc, + 0xfd, 0xfd, 0xfd, 0xfd, + 0xfe, 0xfe, 0xfe, 0xfe, +@@ -133,18 +99,6 @@ VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe, + 0x1ffffffe, 0x1ffffffe }; + VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff, + 0x7ffffffffffffff }; +-VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333, +- 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333, +- 0x33333333, +- 0x33333333, +- 0x33333333 }; + + + #ifndef INSN_NAME +@@ -187,7 +141,22 @@ void FNNAME (INSN_NAME) (void) + /* Execute the tests. */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + +- CHECK_RESULTS (TEST_MSG, ""); ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); + + + /* Test large shift amount (larger or equal to the type width. */ +@@ -203,7 +172,23 @@ void FNNAME (INSN_NAME) (void) + /* Execute the tests. */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)"); ++#define COMMENT1 "(large shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_shift, COMMENT1); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_shift, COMMENT1); + + + /* Test negative shift amount. */ +@@ -219,7 +204,23 @@ void FNNAME (INSN_NAME) (void) + /* Execute the tests. */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)"); ++#define COMMENT2 "(negative shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative_shift, COMMENT2); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_negative_shift, COMMENT2); + } + + int main (void) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl_n.c +@@ -0,0 +1,96 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* Use negative shift amounts. */ -+ VDUP(vector_shift, , int, s, 8, 8, -1); -+ VDUP(vector_shift, , int, s, 16, 4, -2); -+ VDUP(vector_shift, , int, s, 32, 2, -3); -+ VDUP(vector_shift, , int, s, 64, 1, -4); -+ VDUP(vector_shift, q, int, s, 8, 16, -7); -+ VDUP(vector_shift, q, int, s, 16, 8, -11); -+ VDUP(vector_shift, q, int, s, 32, 4, -13); -+ VDUP(vector_shift, q, int, s, 64, 2, -20); ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, ++ 0xe8, 0xea, 0xec, 0xee }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffff80, 0xffffff88 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, ++ 0xd0, 0xd4, 0xd8, 0xdc }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff00, 0xff10, 0xff20, 0xff30 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff80, 0xffffff88 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffe0 }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, ++ 0x80, 0xa0, 0xc0, 0xe0, ++ 0x0, 0x20, 0x40, 0x60, ++ 0x80, 0xa0, 0xc0, 0xe0 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, ++ 0xffe8, 0xffea, 0xffec, 0xffee }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffc0, 0xffffffc4, ++ 0xffffffc8, 0xffffffcc }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, ++ 0xd0, 0xd4, 0xd8, 0xdc, ++ 0xe0, 0xe4, 0xe8, 0xec, ++ 0xf0, 0xf4, 0xf8, 0xfc }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff80, 0xff88, 0xff90, 0xff98, ++ 0xffa0, 0xffa8, 0xffb0, 0xffb8 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc4, ++ 0xffffffc8, 0xffffffcc }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffe0, ++ 0xffffffffffffffe2 }; + -+#undef CMT -+#define CMT " (input 0 and negative shift amount)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT); ++#define TEST_MSG "VSHL_N" ++void exec_vshl_n (void) ++{ ++ /* Basic test: v2=vshl_n(v1,v), then store the result. */ ++#define TEST_VSHL_N(Q, T1, T2, W, N, V) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vshl##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + ++ clean_results (); + -+ /* Test again, with predefined input values. */ ++ /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + -+ /* Choose init value arbitrarily, will be used as shift amount. */ -+ VDUP(vector_shift, , int, s, 8, 8, 1); -+ VDUP(vector_shift, , int, s, 16, 4, 3); -+ VDUP(vector_shift, , int, s, 32, 2, 8); -+ VDUP(vector_shift, , int, s, 64, 1, 3); -+ VDUP(vector_shift, q, int, s, 8, 16, 10); -+ VDUP(vector_shift, q, int, s, 16, 8, 12); -+ VDUP(vector_shift, q, int, s, 32, 4, 31); -+ VDUP(vector_shift, q, int, s, 64, 2, 63); ++ /* Choose shift amount arbitrarily. */ ++ TEST_VSHL_N(, int, s, 8, 8, 1); ++ TEST_VSHL_N(, int, s, 16, 4, 1); ++ TEST_VSHL_N(, int, s, 32, 2, 3); ++ TEST_VSHL_N(, int, s, 64, 1, 2); ++ TEST_VSHL_N(, uint, u, 8, 8, 2); ++ TEST_VSHL_N(, uint, u, 16, 4, 4); ++ TEST_VSHL_N(, uint, u, 32, 2, 3); ++ TEST_VSHL_N(, uint, u, 64, 1, 1); ++ ++ TEST_VSHL_N(q, int, s, 8, 16, 5); ++ TEST_VSHL_N(q, int, s, 16, 8, 1); ++ TEST_VSHL_N(q, int, s, 32, 4, 2); ++ TEST_VSHL_N(q, int, s, 64, 2, 2); ++ TEST_VSHL_N(q, uint, u, 8, 16, 2); ++ TEST_VSHL_N(q, uint, u, 16, 8, 3); ++ TEST_VSHL_N(q, uint, u, 32, 4, 2); ++ TEST_VSHL_N(q, uint, u, 64, 2, 1); + -+#undef CMT +#define CMT "" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT); -+ + CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); + CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); @@ -8848,2416 +27073,3619 @@ + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++} + ++int main (void) ++{ ++ exec_vshl_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshll_n.c +@@ -0,0 +1,56 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* Use negative shift amounts. */ -+ VDUP(vector_shift, , int, s, 8, 8, -2); -+ VDUP(vector_shift, , int, s, 16, 4, -2); -+ VDUP(vector_shift, , int, s, 32, 2, -3); -+ VDUP(vector_shift, , int, s, 64, 1, -4); -+ VDUP(vector_shift, q, int, s, 8, 16, -7); -+ VDUP(vector_shift, q, int, s, 16, 8, -11); -+ VDUP(vector_shift, q, int, s, 32, 4, -13); -+ VDUP(vector_shift, q, int, s, 64, 2, -20); ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, ++ 0xffe8, 0xffea, 0xffec, 0xffee }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, ++ 0xffffffe4, 0xffffffe6 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffff80, 0xffffffffffffff88 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3c0, 0x3c4, 0x3c8, 0x3cc, ++ 0x3d0, 0x3d4, 0x3d8, 0x3dc }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff00, 0xfff10, 0xfff20, 0xfff30 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffff80, 0x7ffffff88 }; + -+#undef CMT -+#define CMT " (negative shift amount)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT); ++#define TEST_MSG "VSHLL_N" ++void exec_vshll_n (void) ++{ ++ /* Basic test: v2=vshll_n(v1,v), then store the result. */ ++#define TEST_VSHLL_N(T1, T2, W, W2, N, V) \ ++ VECT_VAR(vector_res, T1, W2, N) = \ ++ vshll##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1q##_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + ++ clean_results (); + -+ /* Fill input vector with max value, to check saturation on -+ limits. */ -+ VDUP(vector, , int, s, 8, 8, 0x7F); -+ VDUP(vector, , int, s, 16, 4, 0x7FFF); -+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); -+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, , uint, u, 8, 8, 0xFF); -+ VDUP(vector, , uint, u, 16, 4, 0xFFFF); -+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); -+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); -+ VDUP(vector, q, int, s, 8, 16, 0x7F); -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, uint, u, 8, 16, 0xFF); -+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); -+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); -+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + -+ /* Use -1 shift amount to check cumulative saturation with -+ round_const. */ -+ VDUP(vector_shift, , int, s, 8, 8, -1); -+ VDUP(vector_shift, , int, s, 16, 4, -1); -+ VDUP(vector_shift, , int, s, 32, 2, -1); -+ VDUP(vector_shift, , int, s, 64, 1, -1); -+ VDUP(vector_shift, q, int, s, 8, 16, -1); -+ VDUP(vector_shift, q, int, s, 16, 8, -1); -+ VDUP(vector_shift, q, int, s, 32, 4, -1); -+ VDUP(vector_shift, q, int, s, 64, 2, -1); ++ /* Choose shift amount arbitrarily. */ ++ TEST_VSHLL_N(int, s, 8, 16, 8, 1); ++ TEST_VSHLL_N(int, s, 16, 32, 4, 1); ++ TEST_VSHLL_N(int, s, 32, 64, 2, 3); ++ TEST_VSHLL_N(uint, u, 8, 16, 8, 2); ++ TEST_VSHLL_N(uint, u, 16, 32, 4, 4); ++ TEST_VSHLL_N(uint, u, 32, 64, 2, 3); + +#undef CMT -+#define CMT " (checking cumulative saturation: shift by -1)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT); -+ -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT); ++#define CMT "" ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++} + ++int main (void) ++{ ++ exec_vshll_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshr_n.c +@@ -0,0 +1,95 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* Use -3 shift amount to check cumulative saturation with -+ round_const. */ -+ VDUP(vector_shift, , int, s, 8, 8, -3); -+ VDUP(vector_shift, , int, s, 16, 4, -3); -+ VDUP(vector_shift, , int, s, 32, 2, -3); -+ VDUP(vector_shift, , int, s, 64, 1, -3); -+ VDUP(vector_shift, q, int, s, 8, 16, -3); -+ VDUP(vector_shift, q, int, s, 16, 8, -3); -+ VDUP(vector_shift, q, int, s, 32, 4, -3); -+ VDUP(vector_shift, q, int, s, 64, 2, -3); ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, ++ 0xfa, 0xfa, 0xfb, 0xfb }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3c, 0x3c, ++ 0x3d, 0x3d, 0x3d, 0x3d }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff, 0x7ffffff }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffff }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf8, 0xf9, 0xf9, ++ 0xfa, 0xfa, 0xfb, 0xfb, ++ 0xfc, 0xfc, 0xfd, 0xfd, ++ 0xfe, 0xfe, 0xff, 0xff }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, ++ 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c, ++ 0x3d, 0x3d, 0x3d, 0x3d, ++ 0x3e, 0x3e, 0x3e, 0x3e, ++ 0x3f, 0x3f, 0x3f, 0x3f }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, ++ 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffffff, 0x7ffffff, ++ 0x7ffffff, 0x7ffffff }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffff, 0x7fffffff }; + -+#undef CMT -+#define CMT " (checking cumulative saturation: shift by -3)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT); ++#define TEST_MSG "VSHR_N" ++void exec_vshr_n (void) ++{ ++ /* Basic test: y=vshr_n(x,v), then store the result. */ ++#define TEST_VSHR_N(Q, T1, T2, W, N, V) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + ++ clean_results (); + -+ /* Use large shift amount. */ -+ VDUP(vector_shift, , int, s, 8, 8, 10); -+ VDUP(vector_shift, , int, s, 16, 4, 20); -+ VDUP(vector_shift, , int, s, 32, 2, 40); -+ VDUP(vector_shift, , int, s, 64, 1, 70); -+ VDUP(vector_shift, q, int, s, 8, 16, 10); -+ VDUP(vector_shift, q, int, s, 16, 8, 20); -+ VDUP(vector_shift, q, int, s, 32, 4, 40); -+ VDUP(vector_shift, q, int, s, 64, 2, 70); ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + -+#undef CMT -+#define CMT " (checking cumulative saturation: large shift amount)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT); ++ /* Choose shift amount arbitrarily. */ ++ TEST_VSHR_N(, int, s, 8, 8, 1); ++ TEST_VSHR_N(, int, s, 16, 4, 12); ++ TEST_VSHR_N(, int, s, 32, 2, 2); ++ TEST_VSHR_N(, int, s, 64, 1, 32); ++ TEST_VSHR_N(, uint, u, 8, 8, 2); ++ TEST_VSHR_N(, uint, u, 16, 4, 3); ++ TEST_VSHR_N(, uint, u, 32, 2, 5); ++ TEST_VSHR_N(, uint, u, 64, 1, 33); ++ ++ TEST_VSHR_N(q, int, s, 8, 16, 1); ++ TEST_VSHR_N(q, int, s, 16, 8, 12); ++ TEST_VSHR_N(q, int, s, 32, 4, 2); ++ TEST_VSHR_N(q, int, s, 64, 2, 32); ++ TEST_VSHR_N(q, uint, u, 8, 16, 2); ++ TEST_VSHR_N(q, uint, u, 16, 8, 3); ++ TEST_VSHR_N(q, uint, u, 32, 4, 5); ++ TEST_VSHR_N(q, uint, u, 64, 2, 33); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT); ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++} + ++int main (void) ++{ ++ exec_vshr_n (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshrn_n.c +@@ -0,0 +1,70 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* Fill input vector with negative values, to check saturation on -+ limits. */ -+ VDUP(vector, , int, s, 8, 8, 0x80); -+ VDUP(vector, , int, s, 16, 4, 0x8000); -+ VDUP(vector, , int, s, 32, 2, 0x80000000); -+ VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); -+ VDUP(vector, q, int, s, 8, 16, 0x80); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, ++ 0xfa, 0xfa, 0xfb, 0xfb }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfc, 0xfc, ++ 0xfd, 0xfd, 0xfd, 0xfd }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; + -+ /* Use large shift amount. */ -+ VDUP(vector_shift, , int, s, 8, 8, 10); -+ VDUP(vector_shift, , int, s, 16, 4, 20); -+ VDUP(vector_shift, , int, s, 32, 2, 40); -+ VDUP(vector_shift, , int, s, 64, 1, 70); -+ VDUP(vector_shift, q, int, s, 8, 16, 10); -+ VDUP(vector_shift, q, int, s, 16, 8, 20); -+ VDUP(vector_shift, q, int, s, 32, 4, 40); -+ VDUP(vector_shift, q, int, s, 64, 2, 70); ++#define TEST_MSG "VSHRN_N" ++void exec_vshrn_n (void) ++{ ++ /* Basic test: y=vshrn_n(x,v), then store the result. */ ++#define TEST_VSHRN_N(T1, T2, W, W2, N, V) \ ++ VECT_VAR(vector_res, T1, W2, N) = \ ++ vshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ V); \ ++ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + -+#undef CMT -+#define CMT " (checking cumulative saturation: large shift amount with negative input)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT); ++ /* vector is twice as large as vector_res. */ ++ DECL_VARIABLE(vector, int, 16, 8); ++ DECL_VARIABLE(vector, int, 32, 4); ++ DECL_VARIABLE(vector, int, 64, 2); ++ DECL_VARIABLE(vector, uint, 16, 8); ++ DECL_VARIABLE(vector, uint, 32, 4); ++ DECL_VARIABLE(vector, uint, 64, 2); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT); ++ DECL_VARIABLE(vector_res, int, 8, 8); ++ DECL_VARIABLE(vector_res, int, 16, 4); ++ DECL_VARIABLE(vector_res, int, 32, 2); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 16, 4); ++ DECL_VARIABLE(vector_res, uint, 32, 2); + ++ clean_results (); + -+ /* Fill input vector with negative and positive values, to check -+ * saturation on limits */ -+ VDUP(vector, , int, s, 8, 8, 0x7F); -+ VDUP(vector, , int, s, 16, 4, 0x7FFF); -+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); -+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, int, s, 8, 16, 0x80); -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++ VLOAD(vector, buffer, q, int, s, 16, 8); ++ VLOAD(vector, buffer, q, int, s, 32, 4); ++ VLOAD(vector, buffer, q, int, s, 64, 2); ++ VLOAD(vector, buffer, q, uint, u, 16, 8); ++ VLOAD(vector, buffer, q, uint, u, 32, 4); ++ VLOAD(vector, buffer, q, uint, u, 64, 2); + -+ /* Use large negative shift amount */ -+ VDUP(vector_shift, , int, s, 8, 8, -10); -+ VDUP(vector_shift, , int, s, 16, 4, -20); -+ VDUP(vector_shift, , int, s, 32, 2, -40); -+ VDUP(vector_shift, , int, s, 64, 1, -70); -+ VDUP(vector_shift, q, int, s, 8, 16, -10); -+ VDUP(vector_shift, q, int, s, 16, 8, -20); -+ VDUP(vector_shift, q, int, s, 32, 4, -40); -+ VDUP(vector_shift, q, int, s, 64, 2, -70); ++ /* Choose shift amount arbitrarily. */ ++ TEST_VSHRN_N(int, s, 16, 8, 8, 1); ++ TEST_VSHRN_N(int, s, 32, 16, 4, 1); ++ TEST_VSHRN_N(int, s, 64, 32, 2, 2); ++ TEST_VSHRN_N(uint, u, 16, 8, 8, 2); ++ TEST_VSHRN_N(uint, u, 32, 16, 4, 3); ++ TEST_VSHRN_N(uint, u, 64, 32, 2, 3); + -+#undef CMT -+#define CMT " (checking cumulative saturation: large negative shift amount)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++#define CMT "" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++} + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++int main (void) ++{ ++ exec_vshrn_n (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc +@@ -120,15 +120,40 @@ void FNNAME (INSN_NAME) (void) + TEST_EXTRA_CHUNK(poly, 16, 8, 1); \ + TEST_EXTRA_CHUNK(float, 32, 4, 1) + ++ /* vshuffle support all vector types except [u]int64x1 and ++ [u]int64x2. */ ++#define CHECK_RESULTS_VSHUFFLE(test_name,EXPECTED,comment) \ ++ { \ ++ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ ++ \ ++ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ ++ CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ ++ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ ++ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ ++ } \ ++ + clean_results (); + + /* Execute the tests. */ + TEST_ALL_VSHUFFLE(INSN_NAME); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)"); ++ CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)"); + + TEST_ALL_EXTRA_CHUNKS(); +- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)"); ++ CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)"); + } + + int main (void) +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c +@@ -23,7 +23,6 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 }; + VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53, + 0x50, 0x51, 0x52, 0x53 }; + VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xd0, 0xd1, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, +@@ -48,8 +47,6 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67 }; + VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3, + 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results with max shift amount. */ + VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0x70, 0x71, 0x72, 0x73, +@@ -68,7 +65,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0x70, 0x71, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77 }; + VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0x7ff0, 0x7ff1, + 0x7ff2, 0x7ff3 }; +-VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0x70, 0x71, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, +@@ -95,8 +91,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0x70, 0x71, 0x72, 0x73, + 0x7c, 0x7d, 0x7e, 0x7f }; + VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +-VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vsXi_n.inc" + +@@ -158,5 +152,23 @@ void vsli_extra(void) + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 15); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)"); ++#define COMMENT "(max shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); + } +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsra_n.c +@@ -0,0 +1,117 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + ++/* Expected results. */ ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, ++ 0x9, 0xa, 0xb, 0xc }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff3, 0xfffffff4 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff, ++ 0x0, 0x1, 0x2, 0x3, ++ 0x4, 0x5, 0x6, 0x7 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd, ++ 0xfffffffe, 0xffffffff }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, ++ 0xfffffffffffffff1 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, ++ 0x9, 0xa, 0xb, 0xc, ++ 0xd, 0xe, 0xf, 0x10, ++ 0x11, 0x12, 0x13, 0x14 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, ++ 0x0, 0x1, 0x2, 0x3 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0xfffffff4, ++ 0xfffffff5, 0xfffffff6 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, ++ 0xfffffffffffffff1 }; + -+ /* Fill input vector with 0, to check saturation in case of large -+ * shift amount */ -+ VDUP(vector, , int, s, 8, 8, 0); -+ VDUP(vector, , int, s, 16, 4, 0); -+ VDUP(vector, , int, s, 32, 2, 0); -+ VDUP(vector, , int, s, 64, 1, 0); -+ VDUP(vector, q, int, s, 8, 16, 0); -+ VDUP(vector, q, int, s, 16, 8, 0); -+ VDUP(vector, q, int, s, 32, 4, 0); -+ VDUP(vector, q, int, s, 64, 2, 0); ++#define TEST_MSG "VSRA_N" ++void exec_vsra_n (void) ++{ ++ /* Basic test: y=vsra_n(x,v), then store the result. */ ++#define TEST_VSRA_N(Q, T1, T2, W, N, V) \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N), \ ++ V); \ ++ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + -+ /* Use large shift amount */ -+ VDUP(vector_shift, , int, s, 8, 8, -10); -+ VDUP(vector_shift, , int, s, 16, 4, -20); -+ VDUP(vector_shift, , int, s, 32, 2, -40); -+ VDUP(vector_shift, , int, s, 64, 1, -70); -+ VDUP(vector_shift, q, int, s, 8, 16, -10); -+ VDUP(vector_shift, q, int, s, 16, 8, -20); -+ VDUP(vector_shift, q, int, s, 32, 4, -40); -+ VDUP(vector_shift, q, int, s, 64, 2, -70); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector2); ++ DECL_VARIABLE_ALL_VARIANTS(vector_res); + -+#undef CMT -+#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)" -+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT); -+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT); ++ clean_results (); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT); ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose arbitrary initialization values. */ ++ VDUP(vector2, , int, s, 8, 8, 0x11); ++ VDUP(vector2, , int, s, 16, 4, 0x22); ++ VDUP(vector2, , int, s, 32, 2, 0x33); ++ VDUP(vector2, , int, s, 64, 1, 0x44); ++ VDUP(vector2, , uint, u, 8, 8, 0x55); ++ VDUP(vector2, , uint, u, 16, 4, 0x66); ++ VDUP(vector2, , uint, u, 32, 2, 0x77); ++ VDUP(vector2, , uint, u, 64, 1, 0x88); ++ ++ VDUP(vector2, q, int, s, 8, 16, 0x11); ++ VDUP(vector2, q, int, s, 16, 8, 0x22); ++ VDUP(vector2, q, int, s, 32, 4, 0x33); ++ VDUP(vector2, q, int, s, 64, 2, 0x44); ++ VDUP(vector2, q, uint, u, 8, 16, 0x55); ++ VDUP(vector2, q, uint, u, 16, 8, 0x66); ++ VDUP(vector2, q, uint, u, 32, 4, 0x77); ++ VDUP(vector2, q, uint, u, 64, 2, 0x88); ++ ++ /* Choose shift amount arbitrarily. */ ++ TEST_VSRA_N(, int, s, 8, 8, 1); ++ TEST_VSRA_N(, int, s, 16, 4, 12); ++ TEST_VSRA_N(, int, s, 32, 2, 2); ++ TEST_VSRA_N(, int, s, 64, 1, 32); ++ TEST_VSRA_N(, uint, u, 8, 8, 2); ++ TEST_VSRA_N(, uint, u, 16, 4, 3); ++ TEST_VSRA_N(, uint, u, 32, 2, 5); ++ TEST_VSRA_N(, uint, u, 64, 1, 33); ++ ++ TEST_VSRA_N(q, int, s, 8, 16, 1); ++ TEST_VSRA_N(q, int, s, 16, 8, 12); ++ TEST_VSRA_N(q, int, s, 32, 4, 2); ++ TEST_VSRA_N(q, int, s, 64, 2, 32); ++ TEST_VSRA_N(q, uint, u, 8, 16, 2); ++ TEST_VSRA_N(q, uint, u, 16, 8, 3); ++ TEST_VSRA_N(q, uint, u, 32, 4, 5); ++ TEST_VSRA_N(q, uint, u, 64, 2, 33); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); ++ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); +} + +int main (void) +{ -+ exec_vqrshl (); ++ exec_vsra_n (); + return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c +@@ -23,7 +23,6 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xe000000000000000 }; + VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5, + 0xc5, 0xc5, 0xc5, 0xc5 }; + VECT_VAR_DECL(expected,poly,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7, + 0xff, 0xff, 0xff, 0xff, +@@ -50,8 +49,6 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1, + 0xe1, 0xe1, 0xe1, 0xe1 }; + VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, + 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results with max shift amount. */ + VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +@@ -70,7 +67,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; + VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0xfff0, 0xfff1, + 0xfff2, 0xfff3 }; +-VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -97,8 +93,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xfc, 0xfd, 0xfe, 0xff }; + VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +-VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vsXi_n.inc" + +@@ -160,5 +154,23 @@ void vsri_extra(void) + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 16); + +- CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)"); ++#define COMMENT "(max shift amount)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); + } --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c -@@ -0,0 +1,174 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c +@@ -0,0 +1,93 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; -+ +/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, -+ 0xfa, 0xfb, 0xfb, 0xfc }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33333333 }; ++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x33333333 }; ++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; ++VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 }; ++VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333, ++ 0x3333, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333, ++ 0x33333333, 0x33333333 }; ++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 }; ++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, ++ 0x3333, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x33333333, ++ 0x33333333, 0x33333333 }; ++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, ++ 0x3333333333333333 }; ++VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33, ++ 0x33, 0x33, 0x33, 0x33 }; ++VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, ++ 0x3333, 0x3333, 0x3333, 0x3333 }; ++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333, ++ 0x33333333, 0x33333333 }; ++ ++#define TEST_MSG "VST1_LANE/VST1_LANEQ" ++void exec_vst1_lane (void) ++{ ++#define TEST_VST1_LANE(Q, T1, T2, W, N, L) \ ++ VECT_VAR(vector, T1, W, N) = \ ++ vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ ++ vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector, T1, W, N), L) + -+/* Expected values of cumulative_saturation flag with shift by 3. */ -+int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1; ++ DECL_VARIABLE_ALL_VARIANTS(vector); + -+/* Expected results with shift by 3. */ -+VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ clean_results (); + -+/* Expected values of cumulative_saturation flag with shift by max -+ amount. */ -+int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1; ++ /* Choose lane arbitrarily. */ ++ TEST_VST1_LANE(, int, s, 8, 8, 7); ++ TEST_VST1_LANE(, int, s, 16, 4, 3); ++ TEST_VST1_LANE(, int, s, 32, 2, 1); ++ TEST_VST1_LANE(, int, s, 64, 1, 0); ++ TEST_VST1_LANE(, uint, u, 8, 8, 6); ++ TEST_VST1_LANE(, uint, u, 16, 4, 2); ++ TEST_VST1_LANE(, uint, u, 32, 2, 0); ++ TEST_VST1_LANE(, uint, u, 64, 1, 0); ++ TEST_VST1_LANE(, poly, p, 8, 8, 6); ++ TEST_VST1_LANE(, poly, p, 16, 4, 2); ++ TEST_VST1_LANE(, float, f, 32, 2, 1); ++ ++ TEST_VST1_LANE(q, int, s, 8, 16, 15); ++ TEST_VST1_LANE(q, int, s, 16, 8, 5); ++ TEST_VST1_LANE(q, int, s, 32, 4, 1); ++ TEST_VST1_LANE(q, int, s, 64, 2, 1); ++ TEST_VST1_LANE(q, uint, u, 8, 16, 10); ++ TEST_VST1_LANE(q, uint, u, 16, 8, 4); ++ TEST_VST1_LANE(q, uint, u, 32, 4, 3); ++ TEST_VST1_LANE(q, uint, u, 64, 2, 0); ++ TEST_VST1_LANE(q, poly, p, 8, 16, 10); ++ TEST_VST1_LANE(q, poly, p, 16, 8, 4); ++ TEST_VST1_LANE(q, float, f, 32, 4, 1); + -+/* Expected results with shift by max amount. */ -+VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ CHECK_RESULTS(TEST_MSG, ""); ++} + -+#define INSN vqrshrn_n -+#define TEST_MSG "VQRSHRN_N" ++int main (void) ++{ ++ exec_vst1_lane (); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst2_lane_f32 (float32_t * p, float32x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_f32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++void ++f_vst2_lane_f64 (float64_t * p, float64x1x2_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c +@@ -0,0 +1,15 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+FNNAME (INSN) ++void ++f_vst2_lane_p8 (poly8_t * p, poly8x8x2_t v) +{ -+ /* Basic test: y=vqrshrn_n(x,v), then store the result. */ -+#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ -+ VECT_VAR(vector_res, T1, W2, N) = \ -+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ -+ VECT_VAR(vector_res, T1, W2, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++void ++f_vst2_lane_s16 (int16_t * p, int16x4x2_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* vector is twice as large as vector_res. */ -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); -+ DECL_VARIABLE(vector, int, 64, 2); -+ DECL_VARIABLE(vector, uint, 16, 8); -+ DECL_VARIABLE(vector, uint, 32, 4); -+ DECL_VARIABLE(vector, uint, 64, 2); ++void ++f_vst2_lane_s32 (int32_t * p, int32x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ DECL_VARIABLE(vector_res, int, 8, 8); -+ DECL_VARIABLE(vector_res, int, 16, 4); -+ DECL_VARIABLE(vector_res, int, 32, 2); -+ DECL_VARIABLE(vector_res, uint, 8, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 32, 2); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ clean_results (); ++void ++f_vst2_lane_s64 (int64_t * p, int64x1x2_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ VLOAD(vector, buffer, q, int, s, 16, 8); -+ VLOAD(vector, buffer, q, int, s, 32, 4); -+ VLOAD(vector, buffer, q, int, s, 64, 2); -+ VLOAD(vector, buffer, q, uint, u, 16, 8); -+ VLOAD(vector, buffer, q, uint, u, 32, 4); -+ VLOAD(vector, buffer, q, uint, u, 64, 2); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Choose shift amount arbitrarily. */ -+#define CMT "" -+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT); -+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT); -+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT); -+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT); -+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT); -+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT); ++void ++f_vst2_lane_s8 (int8_t * p, int8x8x2_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + ++void ++f_vst2_lane_u16 (uint16_t * p, uint16x4x2_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Another set of tests, shifting max value by 3. */ -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); -+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); -+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+#undef CMT -+#define CMT " (check saturation: shift by 3)" -+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT); -+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT); -+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT); -+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT); -+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT); -+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT); ++void ++f_vst2_lane_u32 (uint32_t * p, uint32x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + ++void ++f_vst2_lane_u64 (uint64_t * p, uint64x1x2_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Shift by max amount. */ -+#undef CMT -+#define CMT " (check saturation: shift by max)" -+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT); -+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT); -+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT); -+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT); -+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT); -+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT); ++void ++f_vst2_lane_u8 (uint8_t * p, uint8x8x2_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2_lane_u8 (p, v, -1); ++ return; +} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+int main (void) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst2q_lane_f32 (float32_t * p, float32x4x2_t v) +{ -+ exec_vqrshrn_n (); -+ return 0; ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_f32 (p, v, -1); ++ return; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c -@@ -0,0 +1,189 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ +#include -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected values of cumulative_saturation flag with negative unput. */ -+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; + -+/* Expected results with negative input. */ -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected values of cumulative_saturation flag with max input value -+ shifted by 1. */ -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1; ++void ++f_vst2q_lane_f64 (float64_t * p, float64x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+/* Expected results with max input value shifted by 1. */ -+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 }; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected values of cumulative_saturation flag with max input value -+ shifted by max amount. */ -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0; ++void ++f_vst2q_lane_p8 (poly8_t * p, poly8x16x2_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected results with max input value shifted by max amount. */ -+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 }; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected values of cumulative_saturation flag with min input value -+ shifted by max amount. */ -+int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1; ++void ++f_vst2q_lane_s16 (int16_t * p, int16x8x2_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected results with min input value shifted by max amount. */ -+VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 }; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected values of cumulative_saturation flag with inputs in usual -+ range. */ -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++void ++f_vst2q_lane_s32 (int32_t * p, int32x4x2_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+/* Expected results with inputs in usual range. */ -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49, -+ 0x49, 0x49, 0x49, 0x49 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf }; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+#define INSN vqrshrun_n -+#define TEST_MSG "VQRSHRUN_N" ++void ++f_vst2q_lane_s64 (int64_t * p, int64x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+FNNAME (INSN) ++void ++f_vst2q_lane_s8 (int8_t * p, int8x16x2_t v) +{ -+ /* Basic test: y=vqrshrun_n(x,v), then store the result. */ -+#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ -+ VECT_VAR(vector_res, uint, W2, N) = \ -+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \ -+ VECT_VAR(vector_res, uint, W2, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++void ++f_vst2q_lane_u16 (uint16_t * p, uint16x8x2_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* vector is twice as large as vector_res. */ -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); -+ DECL_VARIABLE(vector, int, 64, 2); ++void ++f_vst2q_lane_u32 (uint32_t * p, uint32x4x2_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ DECL_VARIABLE(vector_res, uint, 8, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 32, 2); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ clean_results (); ++void ++f_vst2q_lane_u64 (uint64_t * p, uint64x2x2_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* Fill input vector with negative values, to check saturation on -+ limits. */ -+ VDUP(vector, q, int, s, 16, 8, -2); -+ VDUP(vector, q, int, s, 32, 4, -3); -+ VDUP(vector, q, int, s, 64, 2, -4); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ /* Choose shift amount arbitrarily. */ -+#define CMT " (negative input)" -+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT); ++void ++f_vst2q_lane_u8 (uint8_t * p, uint8x16x2_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst2q_lane_u8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + ++void ++f_vst3_lane_f32 (float32_t * p, float32x2x3_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_f32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* Fill input vector with max value, to check saturation on -+ limits. */ -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ /* shift by 1. */ -+#undef CMT -+#define CMT " (check cumulative saturation: shift by 1)" -+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT); -+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT); -+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT); ++void ++f_vst3_lane_f64 (float64_t * p, float64x1x3_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + ++void ++f_vst3_lane_p8 (poly8_t * p, poly8x8x3_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* shift by max. */ -+#undef CMT -+#define CMT " (check cumulative saturation: shift by max, positive input)" -+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT); ++void ++f_vst3_lane_s16 (int16_t * p, int16x4x3_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Fill input vector with min value, to check saturation on limits. */ -+ VDUP(vector, q, int, s, 16, 8, 0x8000); -+ VDUP(vector, q, int, s, 32, 4, 0x80000000); -+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); ++void ++f_vst3_lane_s32 (int32_t * p, int32x2x3_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* shift by max */ -+#undef CMT -+#define CMT " (check cumulative saturation: shift by max, negative input)" -+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT); -+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT); -+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT); ++void ++f_vst3_lane_s64 (int64_t * p, int64x1x3_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Fill input vector with positive values, to check normal case. */ -+ VDUP(vector, q, int, s, 16, 8, 0x1234); -+ VDUP(vector, q, int, s, 32, 4, 0x87654321); -+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); ++void ++f_vst3_lane_s8 (int8_t * p, int8x8x3_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* shift arbitrary amount. */ -+#undef CMT -+#define CMT "" -+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT); -+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT); -+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++void ++f_vst3_lane_u16 (uint16_t * p, uint16x4x3_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u16 (p, v, -1); ++ return; +} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+int main (void) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst3_lane_u32 (uint32_t * p, uint32x2x3_t v) +{ -+ exec_vqrshrun_n (); -+ return 0; ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u32 (p, v, -1); ++ return; +} --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c -@@ -0,0 +1,829 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ +#include -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag with input=0. */ -+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected results with input=0. */ -+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 }; ++void ++f_vst3_lane_u64 (uint64_t * p, uint64x1x3_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected values of cumulative_saturation flag with input=0 and -+ negative shift amount. */ -+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected results with input=0 and negative shift amount. */ -+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 }; ++void ++f_vst3_lane_u8 (uint8_t * p, uint8x8x3_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3_lane_u8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, -+ 0xe8, 0xea, 0xec, 0xee }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; -+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, -+ 0x8000, 0x8000, 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, -+ 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, -+ 0x8000000000000000 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++void ++f_vst3q_lane_f32 (float32_t * p, float32x4x3_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_f32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+/* Expected values of cumulative_sat_saturation flag with negative shift -+ amount. */ -+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected results with negative shift amount. */ -+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, -+ 0xfa, 0xfa, 0xfb, 0xfb }; -+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc }; -+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; -+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, -+ 0x7a, 0x7a, 0x7b, 0x7b }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; -+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff }; -+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, -+ 0x1, 0x1, 0x1, 0x1, -+ 0x1, 0x1, 0x1, 0x1, -+ 0x1, 0x1, 0x1, 0x1 }; -+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f, -+ 0x1f, 0x1f, 0x1f, 0x1f }; -+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff, -+ 0x7ffff, 0x7ffff }; -+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff }; ++void ++f_vst3q_lane_f64 (float64_t * p, float64x2x3_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+/* Expected values of cumulative_sat_saturation flag with negative -+ input and large shift amount. */ -+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected results with negative input and large shift amount. */ -+VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 }; -+VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80, -+ 0x80, 0x80, 0x80, 0x80 }; -+VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000, -+ 0x80000000, 0x80000000 }; -+VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000, -+ 0x8000000000000000 }; -+VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++void ++f_vst3q_lane_p8 (poly8_t * p, poly8x16x3_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst3q_lane_s16 (int16_t * p, int16x8x3_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst3q_lane_s32 (int32_t * p, int32x4x3_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++void ++f_vst3q_lane_s64 (int64_t * p, int64x2x3_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+/* Expected values of cumulative_sat_saturation flag with max input -+ and shift by -1. */ -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+/* Expected results with max input and shift by -1. */ -+VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f, -+ 0x3f, 0x3f, 0x3f, 0x3f }; -+VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff, -+ 0x3fff, 0x3fff }; -+VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff }; -+VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff }; -+VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f, -+ 0x3f, 0x3f, 0x3f, 0x3f, -+ 0x3f, 0x3f, 0x3f, 0x3f, -+ 0x3f, 0x3f, 0x3f, 0x3f }; -+VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff, -+ 0x3fff, 0x3fff, -+ 0x3fff, 0x3fff, -+ 0x3fff, 0x3fff }; -+VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff, -+ 0x3fffffff, 0x3fffffff }; -+VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff, -+ 0x3fffffffffffffff }; -+VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff, -+ 0x7fffffffffffffff }; ++void ++f_vst3q_lane_s8 (int8_t * p, int8x16x3_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected values of cumulative_sat_saturation flag with max input -+ and large shift amount. */ -+int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected results with max input and large shift amount. */ -+VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff, -+ 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++void ++f_vst3q_lane_u16 (uint16_t * p, uint16x8x3_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+/* Expected values of cumulative_sat_saturation flag with saturation -+ on 64-bits values. */ -+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1; ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+/* Expected results with saturation on 64-bits values.. */ -+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 }; -+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff, -+ 0x7fffffffffffffff }; ++void ++f_vst3q_lane_u32 (uint32_t * p, uint32x4x3_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+#define INSN vqshl -+#define TEST_MSG "VQSHL/VQSHLQ" ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++void ++f_vst3q_lane_u64 (uint64_t * p, uint64x2x3_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+FNNAME (INSN) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++void ++f_vst3q_lane_u8 (uint8_t * p, uint8x16x3_t v) +{ -+ /* Basic test: v3=vqshl(v1,v2), then store the result. */ -+#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector_shift, T3, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst3q_lane_u8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++void ++f_vst4_lane_f32 (float32_t * p, float32x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_f32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_f32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++void ++f_vst4_lane_f64 (float64_t * p, float64x1x4_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_f64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ clean_results (); ++void ++f_vst4_lane_p8 (poly8_t * p, poly8x8x4_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_p8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Fill input vector with 0, to check saturation on limits. */ -+ VDUP(vector, , int, s, 8, 8, 0); -+ VDUP(vector, , int, s, 16, 4, 0); -+ VDUP(vector, , int, s, 32, 2, 0); -+ VDUP(vector, , int, s, 64, 1, 0); -+ VDUP(vector, , uint, u, 8, 8, 0); -+ VDUP(vector, , uint, u, 16, 4, 0); -+ VDUP(vector, , uint, u, 32, 2, 0); -+ VDUP(vector, , uint, u, 64, 1, 0); -+ VDUP(vector, q, int, s, 8, 16, 0); -+ VDUP(vector, q, int, s, 16, 8, 0); -+ VDUP(vector, q, int, s, 32, 4, 0); -+ VDUP(vector, q, int, s, 64, 2, 0); -+ VDUP(vector, q, uint, u, 8, 16, 0); -+ VDUP(vector, q, uint, u, 16, 8, 0); -+ VDUP(vector, q, uint, u, 32, 4, 0); -+ VDUP(vector, q, uint, u, 64, 2, 0); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst4_lane_s16 (int16_t * p, int16x4x4_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst4_lane_s32 (int32_t * p, int32x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++void ++f_vst4_lane_s64 (int64_t * p, int64x1x4_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Choose init value arbitrarily, will be used as shift amount */ -+ /* Use values equal or one-less-than the type width to check -+ behaviour on limits. */ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* 64-bits vectors first. */ -+ /* Shift 8-bits lanes by 7... */ -+ VDUP(vector_shift, , int, s, 8, 8, 7); -+ /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9). */ -+ VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6); -+ VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8); -+ VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9); ++void ++f_vst4_lane_s8 (int8_t * p, int8x8x4_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Shift 16-bits lanes by 15... */ -+ VDUP(vector_shift, , int, s, 16, 4, 15); -+ /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17). */ -+ VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14); -+ VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16); -+ VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Shift 32-bits lanes by 31... */ -+ VDUP(vector_shift, , int, s, 32, 2, 31); -+ /* ... except lane 1 (by 30). */ -+ VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30); ++void ++f_vst4_lane_u16 (uint16_t * p, uint16x4x4_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u16 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Shift 64 bits lane by 63. */ -+ VDUP(vector_shift, , int, s, 64, 1, 63); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* 128-bits vectors. */ -+ /* Shift 8-bits lanes by 8. */ -+ VDUP(vector_shift, q, int, s, 8, 16, 8); -+ /* Shift 16-bits lanes by 16. */ -+ VDUP(vector_shift, q, int, s, 16, 8, 16); -+ /* Shift 32-bits lanes by 32... */ -+ VDUP(vector_shift, q, int, s, 32, 4, 32); -+ /* ... except lane 1 (by 33). */ -+ VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33); ++void ++f_vst4_lane_u32 (uint32_t * p, uint32x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u32 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* Shift 64-bits lanes by 64... */ -+ VDUP(vector_shift, q, int, s, 64, 2, 64); -+ /* ... except lane 1 (by 62). */ -+ VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+#define CMT " (with input = 0)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT); ++void ++f_vst4_lane_u64 (uint64_t * p, uint64x1x4_t v) ++{ ++ /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u64 (p, v, 1); ++ /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + ++void ++f_vst4_lane_u8 (uint8_t * p, uint8x8x4_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u8 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4_lane_u8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ /* Use negative shift amounts */ -+ VDUP(vector_shift, , int, s, 8, 8, -1); -+ VDUP(vector_shift, , int, s, 16, 4, -2); -+ VDUP(vector_shift, , int, s, 32, 2, -3); -+ VDUP(vector_shift, , int, s, 64, 1, -4); -+ VDUP(vector_shift, q, int, s, 8, 16, -7); -+ VDUP(vector_shift, q, int, s, 16, 8, -11); -+ VDUP(vector_shift, q, int, s, 32, 4, -13); -+ VDUP(vector_shift, q, int, s, 64, 2, -20); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+#undef CMT -+#define CMT " (input 0 and negative shift amount)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT); ++void ++f_vst4q_lane_f32 (float32_t * p, float32x4x4_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_f32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_f32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ /* Test again, with predefined input values. */ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++void ++f_vst4q_lane_f64 (float64_t * p, float64x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_f64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_f64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* Choose init value arbitrarily, will be used as shift amount. */ -+ VDUP(vector_shift, , int, s, 8, 8, 1); -+ VDUP(vector_shift, , int, s, 16, 4, 3); -+ VDUP(vector_shift, , int, s, 32, 2, 8); -+ VDUP(vector_shift, , int, s, 64, 1, -3); -+ VDUP(vector_shift, q, int, s, 8, 16, 10); -+ VDUP(vector_shift, q, int, s, 16, 8, 12); -+ VDUP(vector_shift, q, int, s, 32, 4, 32); -+ VDUP(vector_shift, q, int, s, 64, 2, 63); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+#undef CMT -+#define CMT "" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT); ++void ++f_vst4q_lane_p8 (poly8_t * p, poly8x16x4_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_p8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_p8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++ ++void ++f_vst4q_lane_s16 (int16_t * p, int16x8x4_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Use negative shift amounts */ -+ VDUP(vector_shift, , int, s, 8, 8, -1); -+ VDUP(vector_shift, , int, s, 16, 4, -2); -+ VDUP(vector_shift, , int, s, 32, 2, -3); -+ VDUP(vector_shift, , int, s, 64, 1, -4); -+ VDUP(vector_shift, q, int, s, 8, 16, -7); -+ VDUP(vector_shift, q, int, s, 16, 8, -11); -+ VDUP(vector_shift, q, int, s, 32, 4, -13); -+ VDUP(vector_shift, q, int, s, 64, 2, -20); ++void ++f_vst4q_lane_s32 (int32_t * p, int32x4x4_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ ++ ++void ++f_vst4q_lane_s64 (int64_t * p, int64x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+#undef CMT -+#define CMT " (negative shift amount)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); ++void ++f_vst4q_lane_s8 (int8_t * p, int8x16x4_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_s8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c +@@ -0,0 +1,15 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ /* Use large shift amounts. */ -+ VDUP(vector_shift, , int, s, 8, 8, 8); -+ VDUP(vector_shift, , int, s, 16, 4, 16); -+ VDUP(vector_shift, , int, s, 32, 2, 32); -+ VDUP(vector_shift, , int, s, 64, 1, 64); -+ VDUP(vector_shift, q, int, s, 8, 16, 8); -+ VDUP(vector_shift, q, int, s, 16, 8, 16); -+ VDUP(vector_shift, q, int, s, 32, 4, 32); -+ VDUP(vector_shift, q, int, s, 64, 2, 64); ++void ++f_vst4q_lane_u16 (uint16_t * p, uint16x8x4_t v) ++{ ++ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u16 (p, v, 8); ++ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u16 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c +@@ -0,0 +1,15 @@ ++#include + -+#undef CMT -+#define CMT " (large shift amount, negative input)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT); ++void ++f_vst4q_lane_u32 (uint32_t * p, uint32x4x4_t v) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u32 (p, v, 4); ++ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u32 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c +@@ -0,0 +1,16 @@ ++#include + ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+ /* Fill input vector with max value, to check saturation on limits */ -+ VDUP(vector, , int, s, 8, 8, 0x7F); -+ VDUP(vector, , int, s, 16, 4, 0x7FFF); -+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); -+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, , uint, u, 8, 8, 0xFF); -+ VDUP(vector, , uint, u, 16, 4, 0xFFFF); -+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); -+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); -+ VDUP(vector, q, int, s, 8, 16, 0x7F); -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, uint, u, 8, 16, 0xFF); -+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); -+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); -+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++void ++f_vst4q_lane_u64 (uint64_t * p, uint64x2x4_t v) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u64 (p, v, 2); ++ /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u64 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c +@@ -0,0 +1,16 @@ ++#include + -+ /* Shift by -1 */ -+ VDUP(vector_shift, , int, s, 8, 8, -1); -+ VDUP(vector_shift, , int, s, 16, 4, -1); -+ VDUP(vector_shift, , int, s, 32, 2, -1); -+ VDUP(vector_shift, , int, s, 64, 1, -1); -+ VDUP(vector_shift, q, int, s, 8, 16, -1); -+ VDUP(vector_shift, q, int, s, 16, 8, -1); -+ VDUP(vector_shift, q, int, s, 32, 4, -1); -+ VDUP(vector_shift, q, int, s, 64, 2, -1); ++/* { dg-do compile } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-excess-errors "" { xfail arm*-*-* } } */ ++/* { dg-skip-if "" { arm*-*-* } } */ + -+#undef CMT -+#define CMT " (max input, shift by -1)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT); ++void ++f_vst4q_lane_u8 (uint8_t * p, uint8x16x4_t v) ++{ ++ /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u8 (p, v, 16); ++ /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */ ++ vst4q_lane_u8 (p, v, -1); ++ return; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c +@@ -0,0 +1,578 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT); ++/* Expected results for vst2, chunk 0. */ ++VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st2_0,uint,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; ++VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0x0, 0x0 }; + ++/* Expected results for vst2, chunk 1. */ ++VECT_VAR_DECL(expected_st2_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st2_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + -+ /* Use large shift amounts */ -+ VDUP(vector_shift, , int, s, 8, 8, 8); -+ VDUP(vector_shift, , int, s, 16, 4, 16); -+ VDUP(vector_shift, , int, s, 32, 2, 32); -+ VDUP(vector_shift, , int, s, 64, 1, 64); -+ VDUP(vector_shift, q, int, s, 8, 16, 8); -+ VDUP(vector_shift, q, int, s, 16, 8, 16); -+ VDUP(vector_shift, q, int, s, 32, 4, 32); -+ VDUP(vector_shift, q, int, s, 64, 2, 64); ++/* Expected results for vst3, chunk 0. */ ++VECT_VAR_DECL(expected_st3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; ++VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0xc1600000, 0x0 }; + -+#undef CMT -+#define CMT " (max input, large shift amount)" -+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT); -+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT); ++/* Expected results for vst3, chunk 1. */ ++VECT_VAR_DECL(expected_st3_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,int,32,2) [] = { 0xfffffff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT); ++/* Expected results for vst3, chunk 2. */ ++VECT_VAR_DECL(expected_st3_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st3_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++ ++/* Expected results for vst4, chunk 0. */ ++VECT_VAR_DECL(expected_st4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_st4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; ++VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; ++VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; ++VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_st4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, ++ 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_st4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, ++ 0xc1600000, 0xc1500000 }; + ++/* Expected results for vst4, chunk 1. */ ++VECT_VAR_DECL(expected_st4_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_st4_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; ++VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; ++VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + -+ /* Check 64 bits saturation. */ -+ VDUP(vector, , int, s, 64, 1, -10); -+ VDUP(vector_shift, , int, s, 64, 1, 64); -+ VDUP(vector, q, int, s, 64, 2, 10); -+ VDUP(vector_shift, q, int, s, 64, 2, 64); ++/* Expected results for vst4, chunk 2. */ ++VECT_VAR_DECL(expected_st4_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + -+#undef CMT -+#define CMT " (check saturation on 64 bits)" -+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT); -+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT); ++/* Expected results for vst4, chunk 3. */ ++VECT_VAR_DECL(expected_st4_3,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,int,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, ++ 0x0, 0x0, 0x0, 0x0 }; ++VECT_VAR_DECL(expected_st4_3,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT); -+} ++/* Declare additional input buffers as needed. */ ++/* Input buffers for vld2_lane. */ ++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); ++VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2); ++ ++/* Input buffers for vld3_lane. */ ++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); ++VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3); ++ ++/* Input buffers for vld4_lane. */ ++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); ++VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4); ++ ++void exec_vstX_lane (void) ++{ ++ /* In this case, input variables are arrays of vectors. */ ++#define DECL_VSTX_LANE(T1, W, N, X) \ ++ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ ++ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ ++ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] ++ ++ /* We need to use a temporary result buffer (result_bis), because ++ the one used for other tests is not large enough. A subset of the ++ result data is moved from result_bis to result, and it is this ++ subset which is used to check the actual behaviour. The next ++ macro enables to move another chunk of data from result_bis to ++ result. */ ++ /* We also use another extra input buffer (buffer_src), which we ++ fill with 0xAA, and which it used to load a vector from which we ++ read a given lane. */ ++#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \ ++ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ ++ sizeof(VECT_VAR(buffer_src, T1, W, N))); \ ++ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \ ++ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \ ++ \ ++ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ ++ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ ++ \ ++ VECT_ARRAY_VAR(vector, T1, W, N, X) = \ ++ /* Use dedicated init buffer, of size X. */ \ ++ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ ++ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ ++ L); \ ++ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ ++ VECT_ARRAY_VAR(vector, T1, W, N, X), \ ++ L); \ ++ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ ++ sizeof(VECT_VAR(result, T1, W, N))); ++ ++ /* Overwrite "result" with the contents of "result_bis"[Y]. */ ++#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ ++ memcpy(VECT_VAR(result, T1, W, N), \ ++ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ ++ sizeof(VECT_VAR(result, T1, W, N))); ++ ++ /* We need all variants in 64 bits, but there is no 64x2 variant, ++ nor 128 bits vectors of int8/uint8/poly8. */ ++#define DECL_ALL_VSTX_LANE(X) \ ++ DECL_VSTX_LANE(int, 8, 8, X); \ ++ DECL_VSTX_LANE(int, 16, 4, X); \ ++ DECL_VSTX_LANE(int, 32, 2, X); \ ++ DECL_VSTX_LANE(uint, 8, 8, X); \ ++ DECL_VSTX_LANE(uint, 16, 4, X); \ ++ DECL_VSTX_LANE(uint, 32, 2, X); \ ++ DECL_VSTX_LANE(poly, 8, 8, X); \ ++ DECL_VSTX_LANE(poly, 16, 4, X); \ ++ DECL_VSTX_LANE(float, 32, 2, X); \ ++ DECL_VSTX_LANE(int, 16, 8, X); \ ++ DECL_VSTX_LANE(int, 32, 4, X); \ ++ DECL_VSTX_LANE(uint, 16, 8, X); \ ++ DECL_VSTX_LANE(uint, 32, 4, X); \ ++ DECL_VSTX_LANE(poly, 16, 8, X); \ ++ DECL_VSTX_LANE(float, 32, 4, X) ++ ++#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] ++ ++ /* Use the same lanes regardless of the size of the array (X), for ++ simplicity. */ ++#define TEST_ALL_VSTX_LANE(X) \ ++ TEST_VSTX_LANE(, int, s, 8, 8, X, 7); \ ++ TEST_VSTX_LANE(, int, s, 16, 4, X, 2); \ ++ TEST_VSTX_LANE(, int, s, 32, 2, X, 0); \ ++ TEST_VSTX_LANE(, float, f, 32, 2, X, 0); \ ++ TEST_VSTX_LANE(, uint, u, 8, 8, X, 4); \ ++ TEST_VSTX_LANE(, uint, u, 16, 4, X, 3); \ ++ TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \ ++ TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \ ++ TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \ ++ TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \ ++ TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \ ++ TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \ ++ TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0); \ ++ TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ ++ TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) ++ ++#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ ++ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ ++ TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ ++ TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ ++ TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ ++ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ ++ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ ++ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ ++ TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ ++ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ ++ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) ++ ++ /* Declare the temporary buffers / variables. */ ++ DECL_ALL_VSTX_LANE(2); ++ DECL_ALL_VSTX_LANE(3); ++ DECL_ALL_VSTX_LANE(4); ++ ++ /* Define dummy input arrays, large enough for x4 vectors. */ ++ DUMMY_ARRAY(buffer_src, int, 8, 8, 4); ++ DUMMY_ARRAY(buffer_src, int, 16, 4, 4); ++ DUMMY_ARRAY(buffer_src, int, 32, 2, 4); ++ DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); ++ DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); ++ DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); ++ DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); ++ DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); ++ DUMMY_ARRAY(buffer_src, float, 32, 2, 4); ++ DUMMY_ARRAY(buffer_src, int, 16, 8, 4); ++ DUMMY_ARRAY(buffer_src, int, 32, 4, 4); ++ DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); ++ DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); ++ DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); ++ DUMMY_ARRAY(buffer_src, float, 32, 4, 4); + -+int main (void) -+{ -+ exec_vqshl (); -+ return 0; -+} ---- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c -@@ -0,0 +1,234 @@ -+#include -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" ++ /* Check vst2_lane/vst2q_lane. */ ++ clean_results (); ++#define TEST_MSG "VST2_LANE/VST2Q_LANE" ++ TEST_ALL_VSTX_LANE(2); + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++#define CMT " (chunk 0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_0, CMT); + -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, -+ 0xd0, 0xd4, 0xd8, 0xdc }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, -+ 0xd0, 0xd4, 0xd8, 0xdc, -+ 0xe0, 0xe4, 0xe8, 0xec, -+ 0xf0, 0xf4, 0xf8, 0xfc }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, -+ 0xffe8, 0xffea, 0xffec, 0xffee }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, -+ 0xffffffe4, 0xffffffe6 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++ TEST_ALL_EXTRA_CHUNKS(2, 1); ++#undef CMT ++#define CMT " chunk 1" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_1, CMT); + -+/* Expected values of cumulative_saturation flag with max positive input. */ -+int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1; + -+/* Expected results with max positive input. */ -+VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff, -+ 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff, -+ 0x7fffffffffffffff }; -+VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++ /* Check vst3_lane/vst3q_lane. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VST3_LANE/VST3Q_LANE" ++ TEST_ALL_VSTX_LANE(3); + -+#define INSN vqshl -+#define TEST_MSG "VQSHL_N/VQSHLQ_N" ++#undef CMT ++#define CMT " (chunk 0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_0, CMT); + -+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ TEST_ALL_EXTRA_CHUNKS(3, 1); + -+FNNAME (INSN) -+{ -+ /* Basic test: v2=vqshl_n(v1,v), then store the result. */ -+#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++#undef CMT ++#define CMT " (chunk 1)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_1, CMT); + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ TEST_ALL_EXTRA_CHUNKS(3, 2); + -+#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++#undef CMT ++#define CMT " (chunk 2)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_2, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_2, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_2, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_2, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_2, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_2, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_2, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_2, CMT); + -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); + ++ /* Check vst4_lane/vst4q_lane. */ + clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VST4_LANE/VST4Q_LANE" ++ TEST_ALL_VSTX_LANE(4); + -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+ -+ /* Choose shift amount arbitrarily. */ -+#define CMT "" -+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT); -+ -+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT); ++#undef CMT ++#define CMT " (chunk 0)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_0, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_0, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_0, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_0, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_0, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_0, CMT); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++ TEST_ALL_EXTRA_CHUNKS(4, 1); + ++#undef CMT ++#define CMT " (chunk 1)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_1, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_1, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_1, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_1, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_1, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_1, CMT); + -+ /* Fill input vector with max value, to check saturation on limits. */ -+ VDUP(vector, , int, s, 8, 8, 0x7F); -+ VDUP(vector, , int, s, 16, 4, 0x7FFF); -+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); -+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, , uint, u, 8, 8, 0xFF); -+ VDUP(vector, , uint, u, 16, 4, 0xFFFF); -+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); -+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); -+ VDUP(vector, q, int, s, 8, 16, 0x7F); -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, uint, u, 8, 16, 0xFF); -+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); -+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); -+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++ TEST_ALL_EXTRA_CHUNKS(4, 2); + +#undef CMT -+#define CMT " (with max input)" -+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT); -+ -+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT); -+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT); ++#define CMT " (chunk 2)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_2, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_2, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_2, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_2, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_2, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_2, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_2, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_2, CMT); + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT); -+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT); -+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT); ++ TEST_ALL_EXTRA_CHUNKS(4, 3); ++ ++#undef CMT ++#define CMT " (chunk 3)" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_3, CMT); ++ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_3, CMT); ++ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_3, CMT); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT); ++ CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT); ++ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT); ++ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_3, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_3, CMT); ++ CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT); ++ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_3, CMT); +} + +int main (void) +{ -+ exec_vqshl_n (); ++ exec_vstX_lane (); + return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c +@@ -18,10 +18,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf, + VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 }; + VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 }; + VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; + VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd, + 0xfe, 0xff, 0x0, 0x1, + 0x2, 0x3, 0x4, 0x5, +@@ -41,14 +37,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba, + 0xffffffbb, 0xffffffbc }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed, + 0xffffffffffffffee }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + /* Expected results for float32 variants. Needs to be separated since + the generic test function does not test floating-point +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c +@@ -6,43 +6,13 @@ + #define TEST_MSG "VSUBL" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, + 0x1, 0x2, 0x3, 0x4 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, + 0x1, 0x2, 0x3, 0x4 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x1, 0x2 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vXXXl.inc" +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c +@@ -6,45 +6,15 @@ + #define TEST_MSG "VSUBW" + + /* Expected results. */ +-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, + 0x1, 0x2, 0x3, 0x4 }; + VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; + VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; + VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfefd, 0xfefe, 0xfeff, 0xff00, + 0xff01, 0xff02, 0xff03, 0xff04 }; + VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffeffff, 0xffff0000, + 0xffff0001, 0xffff0002 }; + VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffff00000000, + 0xffffffff00000001 }; +-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33, +- 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, +- 0x3333, 0x3333, 0x3333, 0x3333 }; +-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, +- 0x33333333, 0x33333333 }; + + #include "vXXXw.inc" --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c -@@ -0,0 +1,263 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c +@@ -0,0 +1,289 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + -+/* Expected values of cumulative_saturation flag with negative -+ input. */ -+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; -+ -+/* Expected results with negative input. */ -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 }; ++/* Expected results for vtbl1. */ ++VECT_VAR_DECL(expected_vtbl1,int,8,8) [] = { 0x0, 0xf2, 0xf2, 0xf2, ++ 0x0, 0x0, 0xf2, 0xf2 }; ++VECT_VAR_DECL(expected_vtbl1,uint,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, ++ 0x0, 0x0, 0xf3, 0xf3 }; ++VECT_VAR_DECL(expected_vtbl1,poly,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, ++ 0x0, 0x0, 0xf3, 0xf3 }; ++ ++/* Expected results for vtbl2. */ ++VECT_VAR_DECL(expected_vtbl2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, ++ 0x0, 0x0, 0xf3, 0xf3 }; ++VECT_VAR_DECL(expected_vtbl2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, ++ 0x0, 0x0, 0xf5, 0xf5 }; ++VECT_VAR_DECL(expected_vtbl2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, ++ 0x0, 0x0, 0xf5, 0xf5 }; ++ ++/* Expected results for vtbl3. */ ++VECT_VAR_DECL(expected_vtbl3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, ++ 0xff, 0x0, 0xf4, 0xf4 }; ++VECT_VAR_DECL(expected_vtbl3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, ++ 0xff, 0x0, 0xf7, 0xf7 }; ++VECT_VAR_DECL(expected_vtbl3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, ++ 0xff, 0x0, 0xf7, 0xf7 }; ++ ++/* Expected results for vtbl4. */ ++VECT_VAR_DECL(expected_vtbl4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, ++ 0x3, 0x0, 0xf5, 0xf5 }; ++VECT_VAR_DECL(expected_vtbl4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, ++ 0x3, 0x0, 0xf9, 0xf9 }; ++VECT_VAR_DECL(expected_vtbl4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, ++ 0x3, 0x0, 0xf9, 0xf9 }; ++ ++/* Expected results for vtbx1. */ ++VECT_VAR_DECL(expected_vtbx1,int,8,8) [] = { 0x33, 0xf2, 0xf2, 0xf2, ++ 0x33, 0x33, 0xf2, 0xf2 }; ++VECT_VAR_DECL(expected_vtbx1,uint,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, ++ 0xcc, 0xcc, 0xf3, 0xf3 }; ++VECT_VAR_DECL(expected_vtbx1,poly,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, ++ 0xcc, 0xcc, 0xf3, 0xf3 }; ++ ++/* Expected results for vtbx2. */ ++VECT_VAR_DECL(expected_vtbx2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, ++ 0x33, 0x33, 0xf3, 0xf3 }; ++VECT_VAR_DECL(expected_vtbx2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, ++ 0xcc, 0xcc, 0xf5, 0xf5 }; ++VECT_VAR_DECL(expected_vtbx2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, ++ 0xcc, 0xcc, 0xf5, 0xf5 }; ++ ++/* Expected results for vtbx3. */ ++VECT_VAR_DECL(expected_vtbx3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, ++ 0xff, 0x33, 0xf4, 0xf4 }; ++VECT_VAR_DECL(expected_vtbx3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, ++ 0xff, 0xcc, 0xf7, 0xf7 }; ++VECT_VAR_DECL(expected_vtbx3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, ++ 0xff, 0xcc, 0xf7, 0xf7 }; ++ ++/* Expected results for vtbx4. */ ++VECT_VAR_DECL(expected_vtbx4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, ++ 0x3, 0x33, 0xf5, 0xf5 }; ++VECT_VAR_DECL(expected_vtbx4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, ++ 0x3, 0xcc, 0xf9, 0xf9 }; ++VECT_VAR_DECL(expected_vtbx4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, ++ 0x3, 0xcc, 0xf9, 0xf9 }; + -+/* Expected values of cumulative_saturation flag with shift by 1. */ -+int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0; ++void exec_vtbX (void) ++{ ++ int i; + -+/* Expected results with shift by 1. */ -+VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe, -+ 0xfe, 0xfe, 0xfe, 0xfe }; -+VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; -+VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; -+VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe }; -+VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, -+ 0xfe, 0xfe, 0xfe, 0xfe, -+ 0xfe, 0xfe, 0xfe, 0xfe, -+ 0xfe, 0xfe, 0xfe, 0xfe }; -+VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe, -+ 0xfffe, 0xfffe, 0xfffe, 0xfffe }; -+VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe, -+ 0xfffffffe, 0xfffffffe }; -+VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe, -+ 0xfffffffffffffffe }; ++ /* In this case, input variables are arrays of vectors. */ ++#define DECL_VTBX(T1, W, N, X) \ ++ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X) ++ ++ /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a ++ vector as 1st param, instead of an array of vectors. */ ++#define TEST_VTBL1(T1, T2, T3, W, N) \ ++ VECT_VAR(table_vector, T1, W, N) = \ ++ vld1##_##T2##W((T1##W##_t *)lookup_table); \ ++ \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N), \ ++ VECT_VAR(vector, T3, W, N)); \ ++ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); ++ ++#define TEST_VTBLX(T1, T2, T3, W, N, X) \ ++ VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ ++ vld##X##_##T2##W((T1##W##_t *)lookup_table); \ ++ \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ ++ VECT_VAR(vector, T3, W, N)); \ ++ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); ++ ++ /* We need to define a lookup table. */ ++ uint8_t lookup_table[32]; ++ ++ DECL_VARIABLE(vector, int, 8, 8); ++ DECL_VARIABLE(vector, uint, 8, 8); ++ DECL_VARIABLE(vector, poly, 8, 8); ++ DECL_VARIABLE(vector_res, int, 8, 8); ++ DECL_VARIABLE(vector_res, uint, 8, 8); ++ DECL_VARIABLE(vector_res, poly, 8, 8); + -+/* Expected values of cumulative_saturation flag with shift by 2. */ -+int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1; ++ /* For vtbl1. */ ++ DECL_VARIABLE(table_vector, int, 8, 8); ++ DECL_VARIABLE(table_vector, uint, 8, 8); ++ DECL_VARIABLE(table_vector, poly, 8, 8); ++ ++ /* For vtbx*. */ ++ DECL_VARIABLE(default_vector, int, 8, 8); ++ DECL_VARIABLE(default_vector, uint, 8, 8); ++ DECL_VARIABLE(default_vector, poly, 8, 8); ++ ++ /* We need only 8 bits variants. */ ++#define DECL_ALL_VTBLX(X) \ ++ DECL_VTBX(int, 8, 8, X); \ ++ DECL_VTBX(uint, 8, 8, X); \ ++ DECL_VTBX(poly, 8, 8, X) ++ ++#define TEST_ALL_VTBL1() \ ++ TEST_VTBL1(int, s, int, 8, 8); \ ++ TEST_VTBL1(uint, u, uint, 8, 8); \ ++ TEST_VTBL1(poly, p, uint, 8, 8) ++ ++#define TEST_ALL_VTBLX(X) \ ++ TEST_VTBLX(int, s, int, 8, 8, X); \ ++ TEST_VTBLX(uint, u, uint, 8, 8, X); \ ++ TEST_VTBLX(poly, p, uint, 8, 8, X) ++ ++ /* Declare the temporary buffers / variables. */ ++ DECL_ALL_VTBLX(2); ++ DECL_ALL_VTBLX(3); ++ DECL_ALL_VTBLX(4); ++ ++ /* Fill the lookup table. */ ++ for (i=0; i<32; i++) { ++ lookup_table[i] = i-15; ++ } + -+/* Expected results with shift by 2. */ -+VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff }; -+VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff, -+ 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff, -+ 0xffffffffffffffff }; ++ /* Choose init value arbitrarily, will be used as table index. */ ++ VDUP(vector, , int, s, 8, 8, 1); ++ VDUP(vector, , uint, u, 8, 8, 2); ++ VDUP(vector, , poly, p, 8, 8, 2); + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; -+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++ /* To ensure coverage, add some indexes larger than 8,16 and 32 ++ except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9). */ ++ VSET_LANE(vector, , int, s, 8, 8, 0, 10); ++ VSET_LANE(vector, , int, s, 8, 8, 4, 20); ++ VSET_LANE(vector, , int, s, 8, 8, 5, 40); ++ VSET_LANE(vector, , uint, u, 8, 8, 0, 10); ++ VSET_LANE(vector, , uint, u, 8, 8, 4, 20); ++ VSET_LANE(vector, , uint, u, 8, 8, 5, 40); ++ VSET_LANE(vector, , poly, p, 8, 8, 0, 10); ++ VSET_LANE(vector, , poly, p, 8, 8, 4, 20); ++ VSET_LANE(vector, , poly, p, 8, 8, 5, 40); + -+/* Expected results. */ -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0, -+ 0xa0, 0xa0, 0xa0, 0xa0, -+ 0xa0, 0xa0, 0xa0, 0xa0, -+ 0xa0, 0xa0, 0xa0, 0xa0 }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180, -+ 0x180, 0x180, 0x180, 0x180 }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 }; + ++ /* Check vtbl1. */ ++ clean_results (); ++#define TEST_MSG "VTBL1" ++ TEST_ALL_VTBL1(); + -+#define INSN vqshlu -+#define TEST_MSG "VQSHLU_N/VQSHLUQ_N" ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl1, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl1, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, ""); + -+#define FNNAME1(NAME) void exec_ ## NAME ## _n(void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ /* Check vtbl2. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBL2" ++ TEST_ALL_VTBLX(2); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl2, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl2, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, ""); + -+FNNAME (INSN) -+{ -+ /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ -+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \ -+ VECT_VAR(vector_res, T3, W, N) = \ -+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ -+ VECT_VAR(vector_res, T3, W, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Check vtbl3. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBL3" ++ TEST_ALL_VTBLX(3); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl3, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl3, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, ""); + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Check vtbl4. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBL4" ++ TEST_ALL_VTBLX(4); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl4, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl4, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, ""); ++ ++ ++ /* Now test VTBX. */ ++ ++ /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a ++ vector as 1st param, instead of an array of vectors. */ ++#define TEST_VTBX1(T1, T2, T3, W, N) \ ++ VECT_VAR(table_vector, T1, W, N) = \ ++ vld1##_##T2##W((T1##W##_t *)lookup_table); \ ++ \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N), \ ++ VECT_VAR(table_vector, T1, W, N), \ ++ VECT_VAR(vector, T3, W, N)); \ ++ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); ++ ++#define TEST_VTBXX(T1, T2, T3, W, N, X) \ ++ VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ ++ vld##X##_##T2##W((T1##W##_t *)lookup_table); \ ++ \ ++ VECT_VAR(vector_res, T1, W, N) = \ ++ vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N), \ ++ VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ ++ VECT_VAR(vector, T3, W, N)); \ ++ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ ++ VECT_VAR(vector_res, T1, W, N)); ++ ++#define TEST_ALL_VTBX1() \ ++ TEST_VTBX1(int, s, int, 8, 8); \ ++ TEST_VTBX1(uint, u, uint, 8, 8); \ ++ TEST_VTBX1(poly, p, uint, 8, 8) ++ ++#define TEST_ALL_VTBXX(X) \ ++ TEST_VTBXX(int, s, int, 8, 8, X); \ ++ TEST_VTBXX(uint, u, uint, 8, 8, X); \ ++ TEST_VTBXX(poly, p, uint, 8, 8, X) ++ ++ /* Choose init value arbitrarily, will be used as default value. */ ++ VDUP(default_vector, , int, s, 8, 8, 0x33); ++ VDUP(default_vector, , uint, u, 8, 8, 0xCC); ++ VDUP(default_vector, , poly, p, 8, 8, 0xCC); + -+#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++ /* Check vtbx1. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBX1" ++ TEST_ALL_VTBX1(); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx1, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx1, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, ""); + ++ /* Check vtbx2. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBX2" ++ TEST_ALL_VTBXX(2); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx2, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx2, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, ""); + -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); ++ /* Check vtbx3. */ ++ clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBX3" ++ TEST_ALL_VTBXX(3); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx3, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx3, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, ""); + ++ /* Check vtbx4. */ + clean_results (); ++#undef TEST_MSG ++#define TEST_MSG "VTBX4" ++ TEST_ALL_VTBXX(4); ++ ++ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx4, ""); ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx4, ""); ++ CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, ""); ++} + -+ /* Fill input vector with negative values, to check saturation on -+ limits. */ -+ VDUP(vector, , int, s, 8, 8, -1); -+ VDUP(vector, , int, s, 16, 4, -2); -+ VDUP(vector, , int, s, 32, 2, -3); -+ VDUP(vector, , int, s, 64, 1, -4); -+ VDUP(vector, q, int, s, 8, 16, -1); -+ VDUP(vector, q, int, s, 16, 8, -2); -+ VDUP(vector, q, int, s, 32, 4, -3); -+ VDUP(vector, q, int, s, 64, 2, -4); ++int main (void) ++{ ++ exec_vtbX (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c +@@ -8,12 +8,10 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0x11, 0x11, + 0xf2, 0xf3, 0x11, 0x11 }; + VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1, 0x22, 0x22 }; + VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, + 0xf2, 0xf3, 0x55, 0x55 }; + VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; + VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, + 0xf2, 0xf3, 0x55, 0x55 }; + VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +@@ -25,8 +23,6 @@ VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, + VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1, 0x22, 0x22, + 0xfff2, 0xfff3, 0x22, 0x22 }; + VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 }; +-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, + 0xf2, 0xf3, 0x55, 0x55, + 0xf4, 0xf5, 0x55, 0x55, +@@ -34,8 +30,6 @@ VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, + VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, + 0xfff2, 0xfff3, 0x66, 0x66 }; + VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 }; +-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, + 0xf2, 0xf3, 0x55, 0x55, + 0xf4, 0xf5, 0x55, 0x55, +@@ -50,12 +44,10 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf4, 0xf5, 0x11, 0x11, + 0xf6, 0xf7, 0x11, 0x11 }; + VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff2, 0xfff3, 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, + 0xf6, 0xf7, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, + 0xf6, 0xf7, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +@@ -67,8 +59,6 @@ VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, + VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff4, 0xfff5, 0x22, 0x22, + 0xfff6, 0xfff7, 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, + 0xfa, 0xfb, 0x55, 0x55, + 0xfc, 0xfd, 0x55, 0x55, +@@ -76,8 +66,6 @@ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, + VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, + 0xfff6, 0xfff7, 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, + 0xfa, 0xfb, 0x55, 0x55, + 0xfc, 0xfd, 0x55, 0x55, +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtst.c +@@ -0,0 +1,120 @@ ++#include ++#include "arm-neon-ref.h" ++#include "compute-ref-data.h" + -+ /* Choose shift amount arbitrarily. */ -+#define CMT " (negative input)" -+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT); ++/* Expected results with signed input. */ ++VECT_VAR_DECL(expected_signed,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_signed,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; ++VECT_VAR_DECL(expected_signed,uint,32,2) [] = { 0x0, 0xffffffff }; ++VECT_VAR_DECL(expected_signed,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_signed,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff, ++ 0xffff, 0xffff, 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_signed,uint,32,4) [] = { 0x0, 0xffffffff, ++ 0x0, 0xffffffff }; + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT); ++/* Expected results with unsigned input. */ ++VECT_VAR_DECL(expected_unsigned,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_unsigned,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; ++VECT_VAR_DECL(expected_unsigned,uint,32,2) [] = { 0x0, 0xffffffff }; ++VECT_VAR_DECL(expected_unsigned,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff, ++ 0xff, 0xff, 0xff, 0xff }; ++VECT_VAR_DECL(expected_unsigned,uint,16,8) [] = { 0x0, 0xffff, ++ 0x0, 0xffff, ++ 0xffff, 0xffff, ++ 0xffff, 0xffff }; ++VECT_VAR_DECL(expected_unsigned,uint,32,4) [] = { 0x0, 0xffffffff, ++ 0x0, 0xffffffff }; + -+ -+ /* Fill input vector with max value, to check saturation on -+ limits. */ -+ VDUP(vector, , int, s, 8, 8, 0x7F); -+ VDUP(vector, , int, s, 16, 4, 0x7FFF); -+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); -+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, int, s, 8, 16, 0x7F); -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL); ++#ifndef INSN_NAME ++#define INSN_NAME vtst ++#define TEST_MSG "VTST/VTSTQ" ++#endif + -+ /* shift by 1. */ -+#undef CMT -+#define CMT " (shift by 1)" -+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT); ++/* We can't use the standard ref_v_binary_op.c template because vtst ++ has no 64 bits variant, and outputs are always of uint type. */ ++#define FNNAME1(NAME) void exec_ ## NAME (void) ++#define FNNAME(NAME) FNNAME1(NAME) + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT); ++FNNAME (INSN_NAME) ++{ ++ /* Basic test: y=OP(x,x), then store the result. */ ++#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ ++ VECT_VAR(vector_res, uint, W, N) = \ ++ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ ++ VECT_VAR(vector2, T1, W, N)); \ ++ vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \ ++ VECT_VAR(vector_res, uint, W, N)) + -+ /* shift by 2 to force saturation. */ -+#undef CMT -+#define CMT " (shift by 2)" -+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT); ++#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ ++ TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT); ++ DECL_VARIABLE_ALL_VARIANTS(vector); ++ DECL_VARIABLE_ALL_VARIANTS(vector2); ++ DECL_VARIABLE_UNSIGNED_VARIANTS(vector_res); + -+ -+ /* Fill input vector with positive values, to check normal case. */ -+ VDUP(vector, , int, s, 8, 8, 1); -+ VDUP(vector, , int, s, 16, 4, 2); -+ VDUP(vector, , int, s, 32, 2, 3); -+ VDUP(vector, , int, s, 64, 1, 4); -+ VDUP(vector, q, int, s, 8, 16, 5); -+ VDUP(vector, q, int, s, 16, 8, 6); -+ VDUP(vector, q, int, s, 32, 4, 7); -+ VDUP(vector, q, int, s, 64, 2, 8); + -+ /* Arbitrary shift amount. */ ++ clean_results (); ++ ++ /* Initialize input "vector" from "buffer". */ ++ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); ++ ++ /* Choose init value arbitrarily, will be used as comparison ++ value. */ ++ VDUP(vector2, , int, s, 8, 8, 15); ++ VDUP(vector2, , int, s, 16, 4, 5); ++ VDUP(vector2, , int, s, 32, 2, 1); ++ VDUP(vector2, , uint, u, 8, 8, 15); ++ VDUP(vector2, , uint, u, 16, 4, 5); ++ VDUP(vector2, , uint, u, 32, 2, 1); ++ VDUP(vector2, q, int, s, 8, 16, 15); ++ VDUP(vector2, q, int, s, 16, 8, 5); ++ VDUP(vector2, q, int, s, 32, 4, 1); ++ VDUP(vector2, q, uint, u, 8, 16, 15); ++ VDUP(vector2, q, uint, u, 16, 8, 5); ++ VDUP(vector2, q, uint, u, 32, 4, 1); ++ ++#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2) \ ++ MACRO(VAR, , T1, T2, 8, 8); \ ++ MACRO(VAR, , T1, T2, 16, 4); \ ++ MACRO(VAR, , T1, T2, 32, 2); \ ++ MACRO(VAR, q, T1, T2, 8, 16); \ ++ MACRO(VAR, q, T1, T2, 16, 8); \ ++ MACRO(VAR, q, T1, T2, 32, 4) ++ ++ /* Split the test, as both signed and unsigned variants output their ++ result in an unsigned form (thus the same output variable is used ++ in these tests). */ ++ TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, int, s); ++ ++#define CMT " (signed input)" ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_signed, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_signed, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_signed, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_signed, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_signed, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_signed, CMT); ++ ++ TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, uint, u); ++ +#undef CMT -+#define CMT "" -+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT); -+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT); -+ -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT); ++#define CMT " (unsigned input)" ++ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_unsigned, CMT); ++ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_unsigned, CMT); ++ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_unsigned, CMT); ++ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_unsigned, CMT); ++ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_unsigned, CMT); ++ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_unsigned, CMT); +} + +int main (void) +{ -+ exec_vqshlu_n (); ++ exec_vtst (); + return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c +@@ -9,14 +9,12 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1, + 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; + VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1, + 0xfff2, 0xfff3 }; + VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, + 0xfffffff1 }; +-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; + VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, +@@ -32,8 +30,6 @@ VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1, + 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -44,8 +40,6 @@ VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1, + 0xfff6, 0xfff7 }; + VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, +@@ -62,12 +56,10 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11 }; + VECT_VAR_DECL(expected1,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +@@ -79,8 +71,6 @@ VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, + VECT_VAR_DECL(expected1,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22, + 0x22, 0x22, 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, +@@ -88,8 +78,6 @@ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55, + VECT_VAR_DECL(expected1,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, +--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c +@@ -9,13 +9,11 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11, + VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff2, + 0x22, 0x22 }; + VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, + 0xf1, 0xf5, 0x55, 0x55 }; + VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff2, + 0x66, 0x66 }; + VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, + 0xf1, 0xf5, 0x55, 0x55 }; + VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2, +@@ -29,8 +27,6 @@ VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22, + 0xfff1, 0xfff5, 0x22, 0x22 }; + VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff2, + 0x33, 0x33 }; +-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, + 0xf1, 0xf9, 0x55, 0x55, + 0xf2, 0xfa, 0x55, 0x55, +@@ -39,8 +35,6 @@ VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, + 0xfff1, 0xfff5, 0x66, 0x66 }; + VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, + 0x77, 0x77 }; +-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, + 0xf1, 0xf9, 0x55, 0x55, + 0xf2, 0xfa, 0x55, 0x55, +@@ -56,13 +50,11 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11, + VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff3, + 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, + 0xf3, 0xf7, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff3, + 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, + 0xf3, 0xf7, 0x55, 0x55 }; + VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3, +@@ -76,8 +68,6 @@ VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22, + 0xfff3, 0xfff7, 0x22, 0x22 }; + VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff3, + 0x33, 0x33 }; +-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, + 0xf5, 0xfd, 0x55, 0x55, + 0xf6, 0xfe, 0x55, 0x55, +@@ -86,8 +76,6 @@ VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, + 0xfff3, 0xfff7, 0x66, 0x66 }; + VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, + 0x77, 0x77 }; +-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333, +- 0x3333333333333333 }; + VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, + 0xf5, 0xfd, 0x55, 0x55, + 0xf6, 0xfe, 0x55, 0x55, --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c -@@ -0,0 +1,177 @@ -+#include -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" ++++ b/src/gcc/testsuite/gcc.target/aarch64/arm_align_max_pwr.c +@@ -0,0 +1,15 @@ ++/* { dg-do run } */ + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; ++#include ++#include + -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, -+ 0xfa, 0xfa, 0xfb, 0xfb }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++#define align (1ul << __ARM_ALIGN_MAX_PWR) ++static int x __attribute__ ((aligned (align))); + -+/* Expected values of cumulative_saturation flag with max input value -+ shifted by 3. */ -+int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1; ++int ++main () ++{ ++ assert ((((unsigned long)&x) & (align - 1)) == 0); + -+/* Expected results with max input value shifted by 3. */ -+VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/arm_align_max_stack_pwr.c +@@ -0,0 +1,15 @@ ++/* { dg-do run } */ + -+/* Expected values of cumulative_saturation flag with max input value -+ shifted by type size. */ -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0; -+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0; ++#include ++#include + -+/* Expected results with max input value shifted by type size. */ -+VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, -+ 0x7f, 0x7f, 0x7f, 0x7f }; -+VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff }; ++#define align (1ul << __ARM_ALIGN_MAX_STACK_PWR) + -+#define INSN vqshrn_n -+#define TEST_MSG "VQSHRN_N" ++int ++main () ++{ ++ int x __attribute__ ((aligned (align))); + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++ assert ((((unsigned long)&x) & (align - 1)) == 0); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */ + -+FNNAME (INSN) -+{ -+ /* Basic test: y=vqshrn_n(x,v), then store the result. */ -+#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ -+ VECT_VAR(vector_res, T1, W2, N) = \ -+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ -+ VECT_VAR(vector_res, T1, W2, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++/* Test ARMv8.1-A CAS instruction. */ + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++#include "atomic-inst-ops.inc" + -+#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++#define TEST TEST_TWO + ++#define CAS_ATOMIC(FN, TY, MODEL1, MODEL2) \ ++ int FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \ ++ { \ ++ int model_s = MODEL1; \ ++ int model_f = MODEL2; \ ++ /* The success memory ordering must be at least as strong as \ ++ the failure memory ordering. */ \ ++ if (model_s < model_f) \ ++ return 0; \ ++ /* Ignore invalid memory orderings. */ \ ++ if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL) \ ++ return 0; \ ++ return __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f); \ ++ } + -+ /* vector is twice as large as vector_res. */ -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); -+ DECL_VARIABLE(vector, int, 64, 2); -+ DECL_VARIABLE(vector, uint, 16, 8); -+ DECL_VARIABLE(vector, uint, 32, 4); -+ DECL_VARIABLE(vector, uint, 64, 2); ++#define CAS_ATOMIC_NORETURN(FN, TY, MODEL1, MODEL2) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \ ++ { \ ++ int model_s = MODEL1; \ ++ int model_f = MODEL2; \ ++ /* The success memory ordering must be at least as strong as \ ++ the failure memory ordering. */ \ ++ if (model_s < model_f) \ ++ return; \ ++ /* Ignore invalid memory orderings. */ \ ++ if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL) \ ++ return; \ ++ __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f); \ ++ } + -+ DECL_VARIABLE(vector_res, int, 8, 8); -+ DECL_VARIABLE(vector_res, int, 16, 4); -+ DECL_VARIABLE(vector_res, int, 32, 2); -+ DECL_VARIABLE(vector_res, uint, 8, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 32, 2); ++TEST (cas_atomic, CAS_ATOMIC) ++TEST (cas_atomic_noreturn, CAS_ATOMIC_NORETURN) + -+ clean_results (); + -+ VLOAD(vector, buffer, q, int, s, 16, 8); -+ VLOAD(vector, buffer, q, int, s, 32, 4); -+ VLOAD(vector, buffer, q, int, s, 64, 2); -+ VLOAD(vector, buffer, q, uint, u, 16, 8); -+ VLOAD(vector, buffer, q, uint, u, 32, 4); -+ VLOAD(vector, buffer, q, uint, u, 64, 2); ++/* { dg-final { scan-assembler-times "casb\t" 4} } */ ++/* { dg-final { scan-assembler-times "casab\t" 20} } */ ++/* { dg-final { scan-assembler-times "caslb\t" 4} } */ ++/* { dg-final { scan-assembler-times "casalb\t" 36} } */ ++ ++/* { dg-final { scan-assembler-times "cash\t" 4} } */ ++/* { dg-final { scan-assembler-times "casah\t" 20} } */ ++/* { dg-final { scan-assembler-times "caslh\t" 4} } */ ++/* { dg-final { scan-assembler-times "casalh\t" 36} } */ ++ ++/* { dg-final { scan-assembler-times "cas\t" 8} } */ ++/* { dg-final { scan-assembler-times "casa\t" 40} } */ ++/* { dg-final { scan-assembler-times "casl\t" 8} } */ ++/* { dg-final { scan-assembler-times "casal\t" 72} } */ ++ ++/* { dg-final { scan-assembler-not "ldaxr\t" } } */ ++/* { dg-final { scan-assembler-not "stlxr\t" } } */ ++/* { dg-final { scan-assembler-not "dmb" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */ + -+ /* Choose shift amount arbitrarily. */ -+#define CMT "" -+ TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT); -+ TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT); -+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT); -+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT); ++/* Test ARMv8.1-A Load-ADD instruction. */ + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); ++#include "atomic-inst-ops.inc" + ++#define TEST TEST_ONE + -+ /* Use max possible value as input. */ -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); -+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF); -+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); -+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); ++#define LOAD_ADD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_fetch_add (val, foo, MODEL); \ ++ } + -+#undef CMT -+#define CMT " (check saturation: shift by 3)" -+ TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT); -+ TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT); -+ TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT); -+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT); -+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT); -+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT); ++#define LOAD_ADD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_fetch_add (val, foo, MODEL); \ ++ } + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT); ++#define LOAD_SUB(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_fetch_sub (val, foo, MODEL); \ ++ } + ++#define LOAD_SUB_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_fetch_sub (val, foo, MODEL); \ ++ } + -+#undef CMT -+#define CMT " (check saturation: shift by max)" -+ TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT); -+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT); ++#define ADD_LOAD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_add_fetch (val, foo, MODEL); \ ++ } + -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT); -+} ++#define ADD_LOAD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_add_fetch (val, foo, MODEL); \ ++ } + -+int main (void) -+{ -+ exec_vqshrn_n (); -+ return 0; -+} ++#define SUB_LOAD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_sub_fetch (val, foo, MODEL); \ ++ } ++ ++#define SUB_LOAD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_sub_fetch (val, foo, MODEL); \ ++ } ++ ++TEST (load_add, LOAD_ADD) ++TEST (load_add_notreturn, LOAD_ADD_NORETURN) ++ ++TEST (load_sub, LOAD_SUB) ++TEST (load_sub_notreturn, LOAD_SUB_NORETURN) ++ ++TEST (add_load, ADD_LOAD) ++TEST (add_load_notreturn, ADD_LOAD_NORETURN) ++ ++TEST (sub_load, SUB_LOAD) ++TEST (sub_load_notreturn, SUB_LOAD_NORETURN) ++ ++/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */ ++ ++/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */ ++ ++/* { dg-final { scan-assembler-times "ldadd\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldadda\t" 64} } */ ++/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */ ++ ++/* { dg-final { scan-assembler-not "ldaxr\t" } } */ ++/* { dg-final { scan-assembler-not "stlxr\t" } } */ ++/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c -@@ -0,0 +1,133 @@ -+#include -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" ++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c +@@ -0,0 +1,155 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */ + -+/* Expected values of cumulative_saturation flag with negative input. */ -+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1; ++/* Test ARMv8.1-A LD instruction. */ ++ ++#include "atomic-inst-ops.inc" ++ ++#define TEST TEST_ONE ++ ++#define LOAD_OR(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_fetch_or (val, foo, MODEL); \ ++ } ++ ++#define LOAD_OR_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_fetch_or (val, foo, MODEL); \ ++ } ++ ++#define LOAD_AND(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_fetch_and (val, foo, MODEL); \ ++ } ++ ++#define LOAD_AND_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_fetch_and (val, foo, MODEL); \ ++ } ++ ++#define LOAD_XOR(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_fetch_xor (val, foo, MODEL); \ ++ } ++ ++#define LOAD_XOR_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_fetch_xor (val, foo, MODEL); \ ++ } ++ ++#define OR_LOAD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_or_fetch (val, foo, MODEL); \ ++ } ++ ++#define OR_LOAD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_or_fetch (val, foo, MODEL); \ ++ } ++ ++#define AND_LOAD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_and_fetch (val, foo, MODEL); \ ++ } ++ ++#define AND_LOAD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_and_fetch (val, foo, MODEL); \ ++ } ++ ++#define XOR_LOAD(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ return __atomic_xor_fetch (val, foo, MODEL); \ ++ } ++ ++#define XOR_LOAD_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo) \ ++ { \ ++ __atomic_xor_fetch (val, foo, MODEL); \ ++ } ++ ++ ++TEST (load_or, LOAD_OR) ++TEST (load_or_notreturn, LOAD_OR_NORETURN) ++ ++TEST (load_and, LOAD_AND) ++TEST (load_and_notreturn, LOAD_AND_NORETURN) ++ ++TEST (load_xor, LOAD_XOR) ++TEST (load_xor_notreturn, LOAD_XOR_NORETURN) ++ ++TEST (or_load, OR_LOAD) ++TEST (or_load_notreturn, OR_LOAD_NORETURN) ++ ++TEST (and_load, AND_LOAD) ++TEST (and_load_notreturn, AND_LOAD_NORETURN) ++ ++TEST (xor_load, XOR_LOAD) ++TEST (xor_load_notreturn, XOR_LOAD_NORETURN) + -+/* Expected results with negative input. */ -+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 }; + -+/* Expected values of cumulative_saturation flag with max input value -+ shifted by 1. */ -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1; ++/* Load-OR. */ + -+/* Expected results with max input value shifted by 1. */ -+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff }; -+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 }; ++/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */ + -+/* Expected values of cumulative_saturation flag. */ -+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; -+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; -+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; ++/* { dg-final { scan-assembler-times "ldseth\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */ + -+/* Expected results. */ -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48, -+ 0x48, 0x48, 0x48, 0x48 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe }; ++/* { dg-final { scan-assembler-times "ldset\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldseta\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */ + ++/* Load-AND. */ + -+#define INSN vqshrun_n -+#define TEST_MSG "VQSHRUN_N" ++/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */ + -+#define FNNAME1(NAME) void exec_ ## NAME (void) -+#define FNNAME(NAME) FNNAME1(NAME) ++/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */ + -+FNNAME (INSN) -+{ -+ /* Basic test: y=vqshrun_n(x,v), then store the result. */ -+#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ -+ VECT_VAR(vector_res, uint, W2, N) = \ -+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ V); \ -+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \ -+ VECT_VAR(vector_res, uint, W2, N)); \ -+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) ++/* { dg-final { scan-assembler-times "ldclr\t" 16} */ ++/* { dg-final { scan-assembler-times "ldclra\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldclral\t" 32} } */ + -+ /* Two auxliary macros are necessary to expand INSN */ -+#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++/* Load-XOR. */ + -+#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ -+ TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) ++/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */ + ++/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */ ++/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */ + -+ /* vector is twice as large as vector_res. */ -+ DECL_VARIABLE(vector, int, 16, 8); -+ DECL_VARIABLE(vector, int, 32, 4); -+ DECL_VARIABLE(vector, int, 64, 2); ++/* { dg-final { scan-assembler-times "ldeor\t" 16} */ ++/* { dg-final { scan-assembler-times "ldeora\t" 32} } */ ++/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */ ++/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */ + -+ DECL_VARIABLE(vector_res, uint, 8, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 32, 2); ++/* { dg-final { scan-assembler-not "ldaxr\t" } } */ ++/* { dg-final { scan-assembler-not "stlxr\t" } } */ ++/* { dg-final { scan-assembler-not "dmb" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc +@@ -0,0 +1,66 @@ ++/* Support code for atomic instruction tests. */ ++ ++/* Define types names without spaces. */ ++typedef unsigned char uchar; ++typedef unsigned short ushort; ++typedef unsigned int uint; ++typedef long long longlong; ++typedef unsigned long long ulonglong; ++typedef __int128_t int128; ++typedef __uint128_t uint128; ++ ++#define FNNAME(NAME,TY) NAME ++ ++/* Expand one-model functions. */ ++#define TEST_M1(NAME, FN, TY, MODEL, DUMMY) \ ++ FN (test_##NAME##_##TY, TY, MODEL) ++ ++/* Expand two-model functions. */ ++#define TEST_M2(NAME, FN, TY, MODEL1, MODEL2) \ ++ FN (test_##NAME##_##TY, TY, MODEL1, MODEL2) ++ ++/* Typest to test. */ ++#define TEST_TY(NAME, FN, N, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, char, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, uchar, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, short, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, ushort, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, int, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, uint, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, longlong, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, ulonglong, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, int128, MODEL1, MODEL2) \ ++ TEST_M##N (NAME, FN, uint128, MODEL1, MODEL2) ++ ++/* Models to test. */ ++#define TEST_MODEL(NAME, FN, N) \ ++ TEST_TY (NAME##_relaxed, FN, N, __ATOMIC_RELAXED, DUMMY) \ ++ TEST_TY (NAME##_consume, FN, N, __ATOMIC_CONSUME, DUMMY) \ ++ TEST_TY (NAME##_acquire, FN, N, __ATOMIC_ACQUIRE, DUMMY) \ ++ TEST_TY (NAME##_release, FN, N, __ATOMIC_RELEASE, DUMMY) \ ++ TEST_TY (NAME##_acq_rel, FN, N, __ATOMIC_ACQ_REL, DUMMY) \ ++ TEST_TY (NAME##_seq_cst, FN, N, __ATOMIC_SEQ_CST, DUMMY) \ ++ ++/* Cross-product of models to test. */ ++#define TEST_MODEL_M1(NAME, FN, N, M) \ ++ TEST_TY (NAME##_relaxed, FN, N, M, __ATOMIC_RELAXED) \ ++ TEST_TY (NAME##_consume, FN, N, M, __ATOMIC_CONSUME) \ ++ TEST_TY (NAME##_acquire, FN, N, M, __ATOMIC_ACQUIRE) \ ++ TEST_TY (NAME##_release, FN, N, M, __ATOMIC_RELEASE) \ ++ TEST_TY (NAME##_acq_rel, FN, N, M, __ATOMIC_ACQ_REL) \ ++ TEST_TY (NAME##_seq_cst, FN, N, M, __ATOMIC_SEQ_CST) \ ++ ++#define TEST_MODEL_M2(NAME, FN) \ ++ TEST_MODEL_M1 (NAME##_relaxed, FN, 2, __ATOMIC_RELAXED) \ ++ TEST_MODEL_M1 (NAME##_consume, FN, 2, __ATOMIC_CONSUME) \ ++ TEST_MODEL_M1 (NAME##_acquire, FN, 2, __ATOMIC_ACQUIRE) \ ++ TEST_MODEL_M1 (NAME##_release, FN, 2, __ATOMIC_RELEASE) \ ++ TEST_MODEL_M1 (NAME##_acq_rel, FN, 2, __ATOMIC_ACQ_REL) \ ++ TEST_MODEL_M1 (NAME##_seq_cst, FN, 2, __ATOMIC_SEQ_CST) \ + -+ clean_results (); ++/* Expand functions for a cross-product of memory models and types. */ ++#define TEST_TWO(NAME, FN) TEST_MODEL_M2 (NAME, FN) + -+ /* Fill input vector with negative values, to check saturation on -+ limits. */ -+ VDUP(vector, q, int, s, 16, 8, -2); -+ VDUP(vector, q, int, s, 32, 4, -3); -+ VDUP(vector, q, int, s, 64, 2, -4); ++/* Expand functions for a set of memory models and types. */ ++#define TEST_ONE(NAME, FN) TEST_MODEL (NAME, FN, 1) + -+ /* Choose shift amount arbitrarily. */ -+#define CMT " (negative input)" -+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT); -+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT); +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */ + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT); ++/* Test ARMv8.1-A SWP instruction. */ + -+ -+ /* Fill input vector with max value, to check saturation on -+ limits. */ -+ VDUP(vector, q, int, s, 16, 8, 0x7FFF); -+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); -+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); ++#include "atomic-inst-ops.inc" + -+#undef CMT -+#define CMT " (check cumulative saturation)" -+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT); -+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT); -+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT); ++#define TEST TEST_ONE + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT); ++#define SWAP_ATOMIC(FN, TY, MODEL) \ ++ TY FNNAME (FN, TY) (TY* val, TY foo) \ ++ { \ ++ return __atomic_exchange_n (val, foo, MODEL); \ ++ } + -+ -+ /* Fill input vector with positive values, to check normal case. */ -+ VDUP(vector, q, int, s, 16, 8, 0x1234); -+ VDUP(vector, q, int, s, 32, 4, 0x87654321); -+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); ++#define SWAP_ATOMIC_NORETURN(FN, TY, MODEL) \ ++ void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \ ++ { \ ++ __atomic_exchange (val, foo, bar, MODEL); \ ++ } + -+#undef CMT -+#define CMT "" -+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT); -+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT); -+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT); + -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); -+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); -+} ++TEST (swap_atomic, SWAP_ATOMIC) ++TEST (swap_atomic_noreturn, SWAP_ATOMIC_NORETURN) + -+int main (void) -+{ -+ exec_vqshrun_n (); -+ return 0; -+} ++ ++/* { dg-final { scan-assembler-times "swpb\t" 4} } */ ++/* { dg-final { scan-assembler-times "swpab\t" 8} } */ ++/* { dg-final { scan-assembler-times "swplb\t" 4} } */ ++/* { dg-final { scan-assembler-times "swpalb\t" 8} } */ ++ ++/* { dg-final { scan-assembler-times "swph\t" 4} } */ ++/* { dg-final { scan-assembler-times "swpah\t" 8} } */ ++/* { dg-final { scan-assembler-times "swplh\t" 4} } */ ++/* { dg-final { scan-assembler-times "swpalh\t" 8} } */ ++ ++/* { dg-final { scan-assembler-times "swp\t" 8} } */ ++/* { dg-final { scan-assembler-times "swpa\t" 16} } */ ++/* { dg-final { scan-assembler-times "swpl\t" 8} } */ ++/* { dg-final { scan-assembler-times "swpal\t" 16} } */ ++ ++/* { dg-final { scan-assembler-not "ldaxr\t" } } */ ++/* { dg-final { scan-assembler-not "stlxr\t" } } */ ++/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/aarch64/c-output-template-4.c @@ -0,0 +1,10 @@ @@ -11272,47 +30700,199 @@ + +/* { dg-final { scan-assembler "@ test\\+4" } } */ --- a/src//dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/pow-sqrt-synth-1.c -@@ -0,0 +1,38 @@ ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-mem.c +@@ -0,0 +1,10 @@ +/* { dg-do compile } */ -+/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ ++/* { dg-options "-O2" } */ + ++void ++foo (double *output) ++{ ++ *output = 0.0; ++} + -+double -+foo (double a) ++/* { dg-final { scan-assembler "str\\txzr, \\\[x0\\\]" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++void bar (double); ++void ++foo (void) +{ -+ return __builtin_pow (a, -5.875); ++ bar (0.0); +} + -+double -+foof (double a) ++/* { dg-final { scan-assembler "fmov\\td0, xzr" } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero.c ++++ b/src//dev/null +@@ -1,10 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O2" } */ +- +-void +-foo (double *output) +-{ +- *output = 0.0; +-} +- +-/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-mem.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++void ++foo (float *output) +{ -+ return __builtin_pow (a, 0.75f); ++ *output = 0.0; +} + -+double -+bar (double a) ++/* { dg-final { scan-assembler "str\\twzr, \\\[x0\\\]" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++void bar (float); ++void ++foo (void) +{ -+ return __builtin_pow (a, 1.0 + 0.00390625); ++ bar (0.0); +} + -+double -+baz (double a) ++/* { dg-final { scan-assembler "fmov\\ts0, wzr" } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero.c ++++ b/src//dev/null +@@ -1,10 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O2" } */ +- +-void +-foo (float *output) +-{ +- *output = 0.0; +-} +- +-/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovld-zero-mem.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++void ++foo (long double *output) +{ -+ return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375); ++ *output = 0.0; +} + -+#define N 256 ++/* { dg-final { scan-assembler "stp\\txzr, xzr, \\\[x0\\\]" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovld-zero-reg.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++void bar (long double); +void -+vecfoo (double *a) ++foo (void) +{ -+ for (int i = 0; i < N; i++) -+ a[i] = __builtin_pow (a[i], 1.25); ++ bar (0.0); +} + -+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ -+/* { dg-final { cleanup-tree-dump "sincos" } } */ -\ No newline at end of file ++/* { dg-final { scan-assembler "movi\\tv0\.2d, #0" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_1.c +@@ -0,0 +1,10 @@ ++/* { dg-options "-mgeneral-regs-only" } */ ++ ++typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); ++ ++/* { dg-error "'-mgeneral-regs-only' is incompatible with vector return type" "" {target "aarch64*-*-*"} 7 } */ ++/* { dg-error "'-mgeneral-regs-only' is incompatible with vector argument" "" {target "aarch64*-*-*"} 7 } */ ++int32x2_t test (int32x2_t a, int32x2_t b) ++{ ++ return a + b; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_2.c +@@ -0,0 +1,15 @@ ++/* { dg-options "-mgeneral-regs-only" } */ ++ ++#include ++ ++typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); ++ ++int ++test (int i, ...) ++{ ++ va_list argp; ++ va_start (argp, i); ++ int32x2_t x = (int32x2_t) {0, 1}; ++ x += va_arg (argp, int32x2_t); /* { dg-error "'-mgeneral-regs-only' is incompatible with vector varargs" } */ ++ return x[0] + x[1]; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_3.c +@@ -0,0 +1,11 @@ ++/* { dg-options "-mgeneral-regs-only -O2" } */ ++ ++extern void abort (void); ++ ++int ++test (int i, ...) ++{ ++ float f = (float) i; /* { dg-error "'-mgeneral-regs-only' is incompatible with floating-point code" } */ ++ if (f != f) abort (); ++ return 2; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/neg_abs_1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-save-temps -O2" } */ ++ ++int ++f1 (int x) ++{ ++ return x < 0 ? x : -x; ++} ++ ++long long ++f2 (long long x) ++{ ++ return x < 0 ? x : -x; ++} ++ ++/* { dg-final { scan-assembler-not "\tneg\tw\[0-9\]*.*" } } */ ++/* { dg-final { scan-assembler-not "\tneg\tx\[0-9\]*.*" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/aarch64/nofp_1.c +@@ -0,0 +1,19 @@ ++/* { dg-skip-if "conflicting -march" { *-*-* } { "-march=*" } { "-march=*+nofp" } } */ ++/* If there are multiple -march's, the latest wins; skip the test either way. ++ -march overrides -mcpu, so there is no possibility of conflict. */ ++ ++/* { dg-options "-march=armv8-a+nofp" } */ ++ ++#include ++ ++typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); ++ ++int test (int i, ...); ++ ++int ++main (int argc, char **argv) ++{ ++ int32x2_t a = (int32x2_t) {0, 1}; ++ int32x2_t b = (int32x2_t) {2, 3}; ++ return test (2, a, b); /* { dg-error "'\\+nofp' feature modifier is incompatible with vector argument" } */ ++} --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/aarch64/pr65491_1.c @@ -0,0 +1,11 @@ @@ -11398,6 +30978,61 @@ + -fomit-frame-pointer which avoids use of stp in the prologue to main(). */ +/* { dg-final { scan-assembler-not "stp\\t" } } */ +/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c +@@ -40,6 +40,7 @@ THING (int16x8_t, 8, int16_t, q_s16) \ + THING (uint16x8_t, 8, uint16_t, q_u16) \ + THING (int32x4_t, 4, int32_t, q_s32) \ + THING (uint32x4_t, 4, uint32_t, q_u32) \ ++THING (float32x4_t, 4, float32_t, q_f32)\ + THING (int64x2_t, 2, int64_t, q_s64) \ + THING (uint64x2_t, 2, uint64_t, q_u64) \ + THING (float64x2_t, 2, float64_t, q_f64) +--- a/src/gcc/testsuite/gcc.target/aarch64/vld1_lane.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vld1_lane.c +@@ -56,7 +56,7 @@ VARIANTS (TESTMETH) + + #define CHECK(BASE, Q, ELTS, SUFFIX, LANE) \ + if (test_vld1##Q##_lane##SUFFIX ((const BASE##_t *)orig_data, \ +- BASE##_data) != 0) \ ++ & BASE##_data) != 0) \ + abort (); + + int +@@ -65,20 +65,19 @@ main (int argc, char **argv) + /* Original data for all vector formats. */ + uint64_t orig_data[2] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL}; + +- /* Data with which vldN_lane will overwrite some of previous. */ +- uint8_t uint8_data[4] = { 7, 11, 13, 17 }; +- uint16_t uint16_data[4] = { 257, 263, 269, 271 }; +- uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 }; +- uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL, +- 0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; +- int8_t int8_data[4] = { -1, 3, -5, 7 }; +- int16_t int16_data[4] = { 257, -259, 261, -263 }; +- int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 }; +- int64_t *int64_data = (int64_t *)uint64_data; +- poly8_t poly8_data[4] = { 0, 7, 13, 18, }; +- poly16_t poly16_data[4] = { 11111, 2222, 333, 44 }; +- float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; +- float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 }; ++ /* Data with which vld1_lane will overwrite one element of previous. */ ++ uint8_t uint8_data = 7; ++ uint16_t uint16_data = 257; ++ uint32_t uint32_data = 65537; ++ uint64_t uint64_data = 0xdeadbeefcafebabeULL; ++ int8_t int8_data = -1; ++ int16_t int16_data = -259; ++ int32_t int32_data = -987654321; ++ int64_t int64_data = 0x1234567890abcdefLL; ++ poly8_t poly8_data = 13; ++ poly16_t poly16_data = 11111; ++ float32_t float32_data = 3.14159; ++ float64_t float64_data = 1.010010001; + + VARIANTS (CHECK); + return 0; --- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c @@ -54,11 +54,11 @@ test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data, \ @@ -11468,31 +31103,150 @@ + return 0; \ +} + -+/* Tests of vst2_lane and vst2q_lane. */ -+VARIANTS (TESTMETH, 2) -+/* Tests of vst3_lane and vst3q_lane. */ -+VARIANTS (TESTMETH, 3) -+/* Tests of vst4_lane and vst4q_lane. */ -+VARIANTS (TESTMETH, 4) ++/* Tests of vst2_lane and vst2q_lane. */ ++VARIANTS (TESTMETH, 2) ++/* Tests of vst3_lane and vst3q_lane. */ ++VARIANTS (TESTMETH, 3) ++/* Tests of vst4_lane and vst4q_lane. */ ++VARIANTS (TESTMETH, 4) ++ ++#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ ++ if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data)) \ ++ abort (); ++ ++int ++main (int argc, char **argv) ++{ ++ /* Original data for all vector formats. */ ++ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL, ++ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL, ++ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL, ++ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL}; ++ ++ VARIANTS (CHECK, 2); ++ VARIANTS (CHECK, 3); ++ VARIANTS (CHECK, 4); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/attr_arm-err.c +@@ -0,0 +1,13 @@ ++/* Check that attribute target arm is rejected for M profile. */ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_arm_ok } */ ++/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-march=*" } { "-march=armv6-m" } } */ ++/* { dg-add-options arm_arch_v6m } */ ++ ++int __attribute__((target("arm"))) ++foo(int a) ++{ /* { dg-error "does not support" } */ ++ return a ? 1 : 5; ++} ++ ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/attr_arm.c +@@ -0,0 +1,11 @@ ++/* Check that attribute target arm is recognized. */ ++/* { dg-do compile } */ ++/* { dg-final { scan-assembler "\\.arm" } } */ ++/* { dg-final { scan-assembler-not "\\.thumb_func" } } */ ++ ++int __attribute__((target("arm"))) ++foo(int a) ++{ ++ return a ? 1 : 5; ++} ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb-static.c +@@ -0,0 +1,25 @@ ++/* Check that a change mode to a static function is correctly handled. */ ++/* { dg-do run } */ ++ ++static void ++ __attribute__((__noinline__)) ++foo (void) ++{ ++ __asm__ (""); ++} ++ ++static void ++__attribute__((__noinline__)) ++__attribute__((target("thumb"))) ++bar (void) ++{ ++ __asm__ (""); ++} ++ ++int ++main (void) ++{ ++ foo(); ++ bar(); ++ return 0; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb-static2.c +@@ -0,0 +1,40 @@ ++/* Check interwork between static functions for thumb2. */ + -+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ -+ if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data)) \ -+ abort (); ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_arch_v7a_ok } */ ++/* { dg-options "-O0 -march=armv7-a" } */ + -+int -+main (int argc, char **argv) ++struct _NSPoint +{ -+ /* Original data for all vector formats. */ -+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL, -+ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL, -+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL, -+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL}; ++ float x; ++ float y; ++}; + -+ VARIANTS (CHECK, 2); -+ VARIANTS (CHECK, 3); -+ VARIANTS (CHECK, 4); -+ return 0; ++typedef struct _NSPoint NSPoint; ++ ++static NSPoint ++__attribute__ ((target("arm"))) ++NSMakePoint (float x, float y) ++{ ++ NSPoint point; ++ point.x = x; ++ point.y = y; ++ return point; ++} ++ ++static NSPoint ++__attribute__ ((target("thumb"))) ++RelativePoint (NSPoint point, NSPoint refPoint) ++{ ++ return NSMakePoint (refPoint.x + point.x, refPoint.y + point.y); ++} ++ ++NSPoint ++__attribute__ ((target("arm"))) ++g(NSPoint refPoint) ++{ ++ float pointA, pointB; ++ return RelativePoint (NSMakePoint (0, pointA), refPoint); ++} ++ ++/* { dg-final { scan-assembler-times "blx" 2 } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb.c +@@ -0,0 +1,15 @@ ++/* Check that attribute target thumb is recognized. */ ++/* { dg-do compile } */ ++/* Make sure the current multilib supports thumb. */ ++/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ ++/* { dg-options "-O2 -mno-restrict-it" } */ ++/* { dg-final { scan-assembler-not "\\.arm" } } */ ++/* { dg-final { scan-assembler "\\.thumb_func" } } */ ++ ++int __attribute__((target("thumb"))) ++foo(int a) ++{ ++ /* { dg-final { scan-assembler "ite" { target { arm_thumb2_ok } } } } */ ++ return a ? 1 : 5; +} ++ --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/bics_1.c @@ -0,0 +1,54 @@ @@ -11706,6 +31460,81 @@ + +/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/fixed_float_conversion.c ++++ b/src/gcc/testsuite/gcc.target/arm/fixed_float_conversion.c +@@ -3,6 +3,7 @@ + /* { dg-require-effective-target arm_vfp3_ok } */ + /* { dg-options "-O1" } */ + /* { dg-add-options arm_vfp3 } */ ++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ + + float + fixed_to_float (int i) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/flip-thumb.c +@@ -0,0 +1,26 @@ ++/* Check -mflip-thumb. */ ++/* { dg-do compile } */ ++/* Make sure the current multilib supports thumb. */ ++/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ ++/* { dg-options "-O2 -mflip-thumb -mno-restrict-it" } */ ++/* { dg-final { scan-assembler "\\.arm" } } */ ++/* { dg-final { scan-assembler-times "\\.thumb_func" 1} } */ ++ ++int ++foo(int a) ++{ ++ return a ? 1 : 5; ++} ++ ++int ++bar(int a) ++{ ++ return a ? 1 : 5; ++} ++ ++/* { dg-final { scan-assembler-times "ite" 1 { target { arm_thumb2_ok } } } } */ ++ ++ ++ ++ ++ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/maskdata.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options " -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++#define MASK 0xff00ff ++void maskdata (int * data, int len) ++{ ++ int i = len; ++ for (; i > 0; i -= 2) ++ { ++ data[i] &= MASK; ++ data[i + 1] &= MASK; ++ } ++} ++/* { dg-final { scan-assembler-not "65280" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/memset-inline-10.c ++++ b/src/gcc/testsuite/gcc.target/arm/memset-inline-10.c +@@ -1,5 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-march=armv7-a -mfloat-abi=hard -mfpu=neon -O2" } */ ++/* { dg-skip-if "need SIMD instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ ++/* { dg-skip-if "need SIMD instructions" { *-*-* } { "-mfpu=vfp*" } { "" } } */ + + #define BUF 100 + long a[BUF]; +--- a/src/gcc/testsuite/gcc.target/arm/neon-reload-class.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-reload-class.c +@@ -15,4 +15,4 @@ _op_blend_p_caa_dp(unsigned *s, unsigned* e, unsigned *d, unsigned c) { + + /* These constants should be emitted as immediates rather than loaded from memory. */ + +-/* { dg-final { scan-assembler-not "(\\.d?word|mov(w|t))" } } */ ++/* { dg-final { scan-assembler-not "(\\.d?word)" } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c @@ -58,18 +58,18 @@ GEN_COND_TESTS(vceq) @@ -11739,6 +31568,15 @@ /* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ /* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ /* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ +--- a/src/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c ++++ b/src/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c +@@ -1,5 +1,6 @@ + /* { dg-do compile } */ + /* { dg-require-effective-target arm_thumb2_ok } */ ++/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-march=*" "-mcpu=*" } { "-mcpu=cortex-m7" } } */ + /* { dg-options "-Os -mthumb -mcpu=cortex-m7" } */ + + int --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/pr26702.c @@ -0,0 +1,4 @@ @@ -11755,6 +31593,54 @@ -/* { dg-final { scan-assembler-times "ldr" 1 } } */ +/* { dg-final { scan-assembler-times "str" 1 } } */ --- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/pr43404.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target naked_functions } */ ++/* { dg-options "-O0" } */ ++ ++__attribute__ ((naked)) ++void __data_abort(void) ++{ ++ long foo; /* { dg-error "cannot allocate stack for variable" } */ ++ long* bar = &foo; ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/pr48470.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target naked_functions } */ ++/* { dg-options "-O0" } */ ++ ++extern void g(int *x); ++ ++void __attribute__((naked)) f(void) ++{ ++ int x = 0; /* { dg-error "cannot allocate stack for variable" } */ ++ g(&x); ++} +--- a/src/gcc/testsuite/gcc.target/arm/pr58784.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c +@@ -1,6 +1,8 @@ + /* { dg-do compile } */ + /* { dg-skip-if "incompatible options" { arm_thumb1 } { "*" } { "" } } */ + /* { dg-options "-march=armv7-a -mfloat-abi=hard -mfpu=neon -marm -O2" } */ ++/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ ++ + + typedef struct __attribute__ ((__packed__)) + { +--- a/src/gcc/testsuite/gcc.target/arm/pr59985.C ++++ b/src/gcc/testsuite/gcc.target/arm/pr59985.C +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-skip-if "incompatible options" { arm_thumb1 } { "*" } { "" } } */ + /* { dg-options "-g -fcompare-debug -O2 -march=armv7-a -mtune=cortex-a9 -mfpu=vfpv3-d16 -mfloat-abi=hard" } */ ++/* { dg-skip-if "need hardfp abi" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ + + extern void *f1 (unsigned long, unsigned long); + extern const struct line_map *f2 (void *, int, unsigned int, const char *, unsigned int); +--- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/pr64208.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ @@ -11801,6 +31687,65 @@ +/* { dg-final { scan-rtl-dump "GLOBAL COPY-PROP" "cprop2" } } */ +/* { dg-final { cleanup-rtl-dump "cprop2" } } */ --- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/pr64744-1.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target naked_functions } */ ++/* { dg-options "-O0" } */ ++ ++__attribute__((naked)) ++void foo1 () ++{ ++ int aa = 0; ++ int ab = {0}; ++} ++ ++__attribute__((naked)) ++void foo2() { ++ char aa [ ] = {}; /* { dg-error "cannot allocate stack for variable" } */ ++ char ab [1] = {}; ++ char ac [2] = {}; /* { dg-error "cannot allocate stack for variable" } */ ++ char ad [3] = {}; /* { dg-error "cannot allocate stack for variable" } */ ++} ++ ++__attribute__((naked)) ++void foo3() { ++ char aa [1] = {0}; ++ char ab [2] = {0}; /* { dg-error "cannot allocate stack for variable" } */ ++ char ac [3] = {0}; /* { dg-error "cannot allocate stack for variable" } */ ++ char ad [4] = {0}; /* { dg-error "cannot allocate stack for variable" } */ ++} ++ ++__attribute__((naked)) ++void foo4() { ++ char aa [2] = {0,0}; /* { dg-error "cannot allocate stack for variable" } */ ++} ++__attribute__((naked)) ++void foo5() { ++ char aa [3] = {0,0,0}; /* { dg-error "cannot allocate stack for variable" } */ ++} ++ ++__attribute__((naked)) ++void foo6() { ++ char aa [4] = {0,0,0,0}; /* { dg-error "cannot allocate stack for variable" } */ ++} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/pr64744-2.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target naked_functions } */ ++/* { dg-options "-O0" } */ ++ ++struct s { ++ char a; ++ int b; ++}; ++ ++__attribute__((naked)) ++void foo () { ++ struct s x = {}; /* { dg-error "cannot allocate stack for variable" } */ ++} +--- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/pr64818.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ @@ -11841,6 +31786,15 @@ /* { dg-options "-mthumb -mcpu=cortex-m3 -O2" } */ struct tmp { +--- a/src/gcc/testsuite/gcc.target/arm/pr65647.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr65647.c +@@ -1,4 +1,6 @@ + /* { dg-do compile } */ ++/* { dg-require-effective-target arm_arch_v6m_ok } */ ++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */ + /* { dg-options "-march=armv6-m -mthumb -O3 -w -mfloat-abi=soft" } */ + + a, b, c, e, g = &e, h, i = 7, l = 1, m, n, o, q = &m, r, s = &r, u, w = 9, x, --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/pr65710.c @@ -0,0 +1,120 @@ @@ -11989,6 +31943,145 @@ + if (b + a < 0) + c = 0; +} +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/pragma_attribute.c +@@ -0,0 +1,35 @@ ++/* Test for #prama target macros. */ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_thumb1_ok } */ ++ ++#pragma GCC target ("thumb") ++ ++#ifndef __thumb__ ++#error "__thumb__ is not defined" ++#endif ++ ++#ifdef __thumb2__ ++#ifndef __ARM_32BIT_STATE ++#error "__ARM_32BIT_STATE is not defined" ++#endif ++#else /* thumb1 */ ++#ifdef __ARM_32BIT_STATE ++#error "__ARM_32BIT_STATE is defined" ++#endif ++#endif /* thumb1 */ ++ ++#pragma GCC target ("arm") ++ ++#ifdef __thumb__ ++#error "__thumb__ is defined" ++#endif ++ ++#if defined (__thumb2__) || defined (__thumb1__) ++#error "thumb is defined" ++#endif ++ ++#ifndef __ARM_32BIT_STATE ++#error "__ARM_32BIT_STATE is not defined" ++#endif ++ ++#pragma GCC reset_options +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/reg_equal_test.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1 -fdump-rtl-expand" } */ ++ ++extern void abort (void); ++unsigned int a = 1; ++ ++int ++main (void) ++{ ++ unsigned int b, c, d; ++ ++ if (sizeof (int) != 4 || (int) 0xc7d24b5e > 0) ++ return 0; ++ ++ c = 0xc7d24b5e; ++ d = a | -2; ++ b = (d == 0) ? c : (c % d); ++ if (b != c) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-rtl-dump "expr_list:REG_EQUAL \\(const_int -942519458" "expand" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/scd42-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/scd42-1.c +@@ -1,7 +1,8 @@ + /* Verify that mov is preferred on XScale for loading a 1 byte constant. */ + /* { dg-do compile } */ + /* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "" } } */ +-/* { dg-options "-mcpu=xscale -O" } */ ++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ ++/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */ + + unsigned load1(void) __attribute__ ((naked)); + unsigned load1(void) +--- a/src/gcc/testsuite/gcc.target/arm/scd42-3.c ++++ b/src/gcc/testsuite/gcc.target/arm/scd42-3.c +@@ -2,7 +2,8 @@ + /* { dg-do compile } */ + /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-march=*" } { "-march=xscale" } } */ + /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-mcpu=*" } { "-mcpu=xscale" } } */ +-/* { dg-options "-mcpu=xscale -O" } */ ++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ ++/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */ + + unsigned load4(void) __attribute__ ((naked)); + unsigned load4(void) +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/short-it-ifcvt-1.c +@@ -0,0 +1,23 @@ ++/* Test that ifcvt is not being too aggressive when -mrestrict-it. */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mrestrict-it" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++f1(int x, int y, int z) ++{ ++ if (x > 100) ++ { ++ x++; ++ z = -z; ++ } ++ else ++ { ++ x = -x; ++ y = -y; ++ z = 1; ++ } ++ return x + y + z; ++} ++ ++/* { dg-final { scan-assembler "b(gt|le)" } } */ +--- a/src//dev/null ++++ b/src/gcc/testsuite/gcc.target/arm/short-it-ifcvt-2.c +@@ -0,0 +1,21 @@ ++/* Test that ifcvt is not being too aggressive when -mrestrict-it. */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mrestrict-it" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++f1(int x, int y, int z) ++{ ++ if (x > 100) ++ { ++ x++; ++ z = -z; ++ } ++ else ++ { ++ x = -x; ++ y = -y; ++ } ++ return x + y + z; ++} ++/* { dg-final { scan-assembler "b(gt|le)" } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp +++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp @@ -27,9 +27,22 @@ load_lib gcc-dg.exp @@ -13152,6 +33245,17 @@ /* { dg-options "-save-temps -O1 -fno-inline" } */ /* { dg-add-options arm_neon } */ +--- a/src/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c +@@ -1,7 +1,7 @@ + /* Check that Thumb 16-bit shifts can be if-converted. */ + /* { dg-do compile } */ + /* { dg-require-effective-target arm_thumb2_ok } */ +-/* { dg-options "-O2 -mthumb" } */ ++/* { dg-options "-O2 -mthumb -mno-restrict-it" } */ + + int + foo (int a, int b) --- a/src//dev/null +++ b/src/gcc/testsuite/gcc.target/arm/unsigned-float.c @@ -0,0 +1,20 @@ @@ -13175,6 +33279,47 @@ +} + +/* { dg-final { scan-assembler-not "vcvt.(f32.f64|f64.f32)" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */ ++/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp -ffp-contract=off" } */ + /* { dg-require-effective-target arm_vfp_ok } */ ++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ + + extern float fabsf (float); + extern float sqrtf (float); +--- a/src/gcc/testsuite/lib/target-supports.exp ++++ b/src/gcc/testsuite/lib/target-supports.exp +@@ -4675,6 +4675,27 @@ proc check_effective_target_vect_call_copysignf { } { + return $et_vect_call_copysignf_saved + } + ++# Return 1 if the target supports hardware square root instructions. ++ ++proc check_effective_target_sqrt_insn { } { ++ global et_sqrt_insn_saved ++ ++ if [info exists et_sqrt_insn_saved] { ++ verbose "check_effective_target_hw_sqrt: using cached result" 2 ++ } else { ++ set et_sqrt_insn_saved 0 ++ if { [istarget x86_64-*-*] ++ || [istarget powerpc*-*-*] ++ || [istarget aarch64*-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_vfp_ok]) } { ++ set et_sqrt_insn_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_hw_sqrt: returning et_sqrt_insn_saved" 2 ++ return $et_sqrt_insn_saved ++} ++ + # Return 1 if the target supports vector sqrtf calls. + + proc check_effective_target_vect_call_sqrtf { } { --- a/src/gcc/tree-ssa-loop-ivopts.c +++ b/src/gcc/tree-ssa-loop-ivopts.c @@ -226,6 +226,7 @@ struct cost_pair @@ -14232,9 +34377,239 @@ /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into powi(x, n/3) * powi(cbrt(x), n%3), n > 0; +--- a/src/gcc/tree-ssa-phiopt.c ++++ b/src/gcc/tree-ssa-phiopt.c +@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see + static unsigned int tree_ssa_phiopt_worker (bool, bool); + static bool conditional_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); ++static bool factor_out_conditional_conversion (edge, edge, gphi *, tree, tree); + static int value_replacement (basic_block, basic_block, + edge, edge, gimple, tree, tree); + static bool minmax_replacement (basic_block, basic_block, +@@ -356,6 +357,19 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) + node. */ + gcc_assert (arg0 != NULL && arg1 != NULL); + ++ if (factor_out_conditional_conversion (e1, e2, phi, arg0, arg1)) ++ { ++ /* factor_out_conditional_conversion may create a new PHI in ++ BB2 and eliminate an existing PHI in BB2. Recompute values ++ that may be affected by that change. */ ++ phis = phi_nodes (bb2); ++ phi = single_non_singleton_phi_for_edges (phis, e1, e2); ++ gcc_assert (phi); ++ arg0 = gimple_phi_arg_def (phi, e1->dest_idx); ++ arg1 = gimple_phi_arg_def (phi, e2->dest_idx); ++ gcc_assert (arg0 != NULL && arg1 != NULL); ++ } ++ + /* Do the replacement of conditional if it can be done. */ + if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -431,6 +445,134 @@ replace_phi_edge_with_variable (basic_block cond_block, + bb->index); + } + ++/* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI ++ stmt are CONVERT_STMT, factor out the conversion and perform the conversion ++ to the result of PHI stmt. */ ++ ++static bool ++factor_out_conditional_conversion (edge e0, edge e1, gphi *phi, ++ tree arg0, tree arg1) ++{ ++ gimple arg0_def_stmt = NULL, arg1_def_stmt = NULL, new_stmt; ++ tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; ++ tree temp, result; ++ gphi *newphi; ++ gimple_stmt_iterator gsi, gsi_for_def; ++ source_location locus = gimple_location (phi); ++ enum tree_code convert_code; ++ ++ /* Handle only PHI statements with two arguments. TODO: If all ++ other arguments to PHI are INTEGER_CST or if their defining ++ statement have the same unary operation, we can handle more ++ than two arguments too. */ ++ if (gimple_phi_num_args (phi) != 2) ++ return false; ++ ++ /* First canonicalize to simplify tests. */ ++ if (TREE_CODE (arg0) != SSA_NAME) ++ { ++ std::swap (arg0, arg1); ++ std::swap (e0, e1); ++ } ++ ++ if (TREE_CODE (arg0) != SSA_NAME ++ || (TREE_CODE (arg1) != SSA_NAME ++ && TREE_CODE (arg1) != INTEGER_CST)) ++ return false; ++ ++ /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is ++ a conversion. */ ++ arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); ++ if (!is_gimple_assign (arg0_def_stmt) ++ || !gimple_assign_cast_p (arg0_def_stmt)) ++ return false; ++ ++ /* Use the RHS as new_arg0. */ ++ convert_code = gimple_assign_rhs_code (arg0_def_stmt); ++ new_arg0 = gimple_assign_rhs1 (arg0_def_stmt); ++ if (convert_code == VIEW_CONVERT_EXPR) ++ new_arg0 = TREE_OPERAND (new_arg0, 0); ++ ++ if (TREE_CODE (arg1) == SSA_NAME) ++ { ++ /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 ++ is a conversion. */ ++ arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); ++ if (!is_gimple_assign (arg1_def_stmt) ++ || gimple_assign_rhs_code (arg1_def_stmt) != convert_code) ++ return false; ++ ++ /* Use the RHS as new_arg1. */ ++ new_arg1 = gimple_assign_rhs1 (arg1_def_stmt); ++ if (convert_code == VIEW_CONVERT_EXPR) ++ new_arg1 = TREE_OPERAND (new_arg1, 0); ++ } ++ else ++ { ++ /* If arg1 is an INTEGER_CST, fold it to new type. */ ++ if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) ++ && int_fits_type_p (arg1, TREE_TYPE (new_arg0))) ++ { ++ if (gimple_assign_cast_p (arg0_def_stmt)) ++ new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); ++ else ++ return false; ++ } ++ else ++ return false; ++ } ++ ++ /* If arg0/arg1 have > 1 use, then this transformation actually increases ++ the number of expressions evaluated at runtime. */ ++ if (!has_single_use (arg0) ++ || (arg1_def_stmt && !has_single_use (arg1))) ++ return false; ++ ++ /* If types of new_arg0 and new_arg1 are different bailout. */ ++ if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) ++ return false; ++ ++ /* Create a new PHI stmt. */ ++ result = PHI_RESULT (phi); ++ temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); ++ newphi = create_phi_node (temp, gimple_bb (phi)); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "PHI "); ++ print_generic_expr (dump_file, gimple_phi_result (phi), 0); ++ fprintf (dump_file, ++ " changed to factor conversion out from COND_EXPR.\n"); ++ fprintf (dump_file, "New stmt with CAST that defines "); ++ print_generic_expr (dump_file, result, 0); ++ fprintf (dump_file, ".\n"); ++ } ++ ++ /* Remove the old cast(s) that has single use. */ ++ gsi_for_def = gsi_for_stmt (arg0_def_stmt); ++ gsi_remove (&gsi_for_def, true); ++ if (arg1_def_stmt) ++ { ++ gsi_for_def = gsi_for_stmt (arg1_def_stmt); ++ gsi_remove (&gsi_for_def, true); ++ } ++ ++ add_phi_arg (newphi, new_arg0, e0, locus); ++ add_phi_arg (newphi, new_arg1, e1, locus); ++ ++ /* Create the conversion stmt and insert it. */ ++ if (convert_code == VIEW_CONVERT_EXPR) ++ temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); ++ new_stmt = gimple_build_assign (result, convert_code, temp); ++ gsi = gsi_after_labels (gimple_bb (phi)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ ++ /* Remove he original PHI stmt. */ ++ gsi = gsi_for_stmt (phi); ++ gsi_remove (&gsi, true); ++ return true; ++} ++ + /* The function conditional_replacement does the main work of doing the + conditional replacement. Return true if the replacement is done. + Otherwise return false. +@@ -2173,6 +2315,26 @@ gate_hoist_loads (void) + This pass also performs a fifth transformation of a slightly different + flavor. + ++ Factor conversion in COND_EXPR ++ ------------------------------ ++ ++ This transformation factors the conversion out of COND_EXPR with ++ factor_out_conditional_conversion. ++ ++ For example: ++ if (a <= CST) goto ; else goto ; ++ : ++ tmp = (int) a; ++ : ++ tmp = PHI ++ ++ Into: ++ if (a <= CST) goto ; else goto ; ++ : ++ : ++ a = PHI ++ tmp = (int) a; ++ + Adjacent Load Hoisting + ---------------------- + +--- a/src/gcc/tree-vrp.c ++++ b/src/gcc/tree-vrp.c +@@ -3145,14 +3145,33 @@ extract_range_from_binary_expr_1 (value_range_t *vr, + and all numbers from min to 0 for negative min. */ + cmp = compare_values (vr0.max, zero); + if (cmp == -1) +- max = zero; ++ { ++ /* When vr0.max < 0, vr1.min != 0 and value ++ ranges for dividend and divisor are available. */ ++ if (vr1.type == VR_RANGE ++ && !symbolic_range_p (&vr0) ++ && !symbolic_range_p (&vr1) ++ && !compare_values (vr1.min, zero)) ++ max = int_const_binop (code, vr0.max, vr1.min); ++ else ++ max = zero; ++ } + else if (cmp == 0 || cmp == 1) + max = vr0.max; + else + type = VR_VARYING; + cmp = compare_values (vr0.min, zero); + if (cmp == 1) +- min = zero; ++ { ++ /* For unsigned division when value ranges for dividend ++ and divisor are available. */ ++ if (vr1.type == VR_RANGE ++ && !symbolic_range_p (&vr0) ++ && !symbolic_range_p (&vr1)) ++ min = int_const_binop (code, vr0.min, vr1.max); ++ else ++ min = zero; ++ } + else if (cmp == 0 || cmp == -1) + min = vr0.min; + else --- a/src/libgcc/config.host +++ b/src/libgcc/config.host -@@ -377,14 +377,15 @@ arm*-*-netbsdelf*) +@@ -382,14 +382,15 @@ arm*-*-netbsdelf*) tmake_file="$tmake_file arm/t-arm arm/t-netbsd t-slibgcc-gld-nover" ;; arm*-*-linux*) # ARM GNU/Linux with ELF @@ -14252,7 +34627,7 @@ tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" tmake_file="${tmake_file} arm/t-bpabi" tm_file="$tm_file arm/bpabi-lib.h" -@@ -396,7 +397,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*) +@@ -401,7 +402,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*) tm_file="$tm_file arm/bpabi-lib.h" case ${host} in arm*-*-eabi* | arm*-*-rtems*) diff -u gcc-5-5.2.1/debian/patches/gdc-5-doc.diff gcc-5-5.2.1/debian/patches/gdc-5-doc.diff --- gcc-5-5.2.1/debian/patches/gdc-5-doc.diff +++ gcc-5-5.2.1/debian/patches/gdc-5-doc.diff @@ -22,25 +22,6 @@ The abbreviation @dfn{GCC} has multiple meanings in common use. The current official meaning is ``GNU Compiler Collection'', which refers -Index: b/src/gcc/doc/install.texi -=================================================================== ---- a/src/gcc/doc/install.texi -+++ b/src/gcc/doc/install.texi -@@ -1547,12 +1547,12 @@ their runtime libraries should be built. - grep language= */config-lang.in - @end smallexample - Currently, you can use any of the following: --@code{all}, @code{ada}, @code{c}, @code{c++}, @code{fortran}, -+@code{all}, @code{ada}, @code{c}, @code{c++}, @code{d}, @code{fortran}, - @code{go}, @code{java}, @code{objc}, @code{obj-c++}. - Building the Ada compiler has special requirements, see below. - If you do not pass this flag, or specify the option @code{all}, then all - default languages available in the @file{gcc} sub-tree will be configured. --Ada, Go and Objective-C++ are not default languages; the rest are. -+Ada, D, Go and Objective-C++ are not default languages; the rest are. - - @item --enable-stage1-languages=@var{lang1},@var{lang2},@dots{} - Specify that a particular subset of compilers and their runtime Index: b/src/gcc/doc/invoke.texi =================================================================== --- a/src/gcc/doc/invoke.texi diff -u gcc-5-5.2.1/debian/patches/pr67736.diff gcc-5-5.2.1/debian/patches/pr67736.diff --- gcc-5-5.2.1/debian/patches/pr67736.diff +++ gcc-5-5.2.1/debian/patches/pr67736.diff @@ -1,15 +1,13 @@ -2012-05-06 Andrew Pinski - - * combine.c (simplify_comparison): Use gen_lowpart_or_truncate instead - of gen_lowpart when we had a truncating and. +2015-10-23 Steve Ellcey + Andrew Pinski - * gcc.c-torture/execute/20110418-1.c: New testcase. + PR rtl-optimization/67736 + * combine.c (simplify_comparison): Use gen_lowpart_or_truncate instead + of gen_lowpart. -Index: a/src/gcc/combine.c -=================================================================== ---- a/src/gcc/combine.c (revision 187203) -+++ a/src/gcc/combine.c (working copy) -@@ -11199,8 +11199,8 @@ simplify_comparison (enum rtx_code code, +--- a/src/gcc/combine.c ++++ a/src/gcc/combine.c +@@ -11530,8 +11530,8 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1) tmode != GET_MODE (op0); tmode = GET_MODE_WIDER_MODE (tmode)) if ((unsigned HOST_WIDE_INT) c0 == GET_MODE_MASK (tmode)) { @@ -23,34 +21,15 @@ -Index: a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c -=================================================================== ---- a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c (revision 0) -+++ a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c (revision 0) -@@ -0,0 +1,29 @@ -+typedef unsigned long long uint64_t; -+void f(uint64_t *a, uint64_t aa) __attribute__((noinline)); -+void f(uint64_t *a, uint64_t aa) -+{ -+ uint64_t new_value = aa; -+ uint64_t old_value = *a; -+ int bit_size = 32; -+ uint64_t mask = (uint64_t)(unsigned)(-1); -+ uint64_t tmp = old_value & mask; -+ new_value &= mask; -+ /* On overflow we need to add 1 in the upper bits */ -+ if (tmp > new_value) -+ new_value += 1ull<= 0 + && const_op >> i == 0 +- && (tmode = mode_for_size (i, MODE_INT, 1)) != BLKmode +- && (TRULY_NOOP_TRUNCATION_MODES_P (tmode, GET_MODE (op0)) +- || (REG_P (XEXP (op0, 0)) +- && reg_truncated_to_mode (tmode, XEXP (op0, 0))))) ++ && (tmode = mode_for_size (i, MODE_INT, 1)) != BLKmode) + { +- op0 = gen_lowpart (tmode, XEXP (op0, 0)); ++ op0 = gen_lowpart_or_truncate (tmode, XEXP (op0, 0)); + continue; + } + diff -u gcc-5-5.2.1/debian/patches/svn-updates.diff gcc-5-5.2.1/debian/patches/svn-updates.diff --- gcc-5-5.2.1/debian/patches/svn-updates.diff +++ gcc-5-5.2.1/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 5 branch upto 20151021 (r229103). +# DP: updates from the 5 branch upto 20151028 (r229478). last_update() { cat > ${dir}LAST_UPDATED ++ ++ Backport from mainline ++ 2015-10-26 Kyrylo Tkachov ++ ++ PR middle-end/67989 ++ * optabs.c (expand_atomic_compare_and_swap): Handle case when ++ ptarget_oval or ptarget_bool are const0_rtx. ++ ++2015-10-27 Kyrylo Tkachov ++ ++ PR target/67929 ++ * config/arm/arm.c (vfp3_const_double_for_bits): Rewrite. ++ * config/arm/constraints.md (Dp): Update callsite. ++ * config/arm/predicates.md (const_double_vcvt_power_of_two): Likewise. ++ ++2015-10-27 Martin Jambor ++ ++ * tree-sra.c (replace_removed_params_ssa_names): Change type of ++ parameter stmt to gimple. ++ ++2015-10-26 Martin Jambor ++ ++ Backport from mainline ++ 2015-10-09 Martin Jambor ++ ++ PR tree-optimization/67794 ++ * tree-sra.c (replace_removed_params_ssa_names): Do not distinguish ++ between types of statements but accept original definitions as a ++ parameter. ++ (ipa_sra_modify_function_body): Use FOR_EACH_SSA_DEF_OPERAND to ++ iterate over definitions. ++ ++2015-10-25 John David Anglin ++ ++ PR middle-end/68079 ++ * dojump.c (do_compare_and_jump): Canonicalize both function and ++ method types. ++ ++2015-10-22 Andreas Krebbel ++ ++ Backport from mainline ++ 2015-10-22 Andreas Krebbel ++ ++ PR target/68015 ++ * config/s390/s390.md (movcc): Emit compare only if we don't ++ already have a comparison result. ++ ++2015-10-22 Uros Bizjak ++ ++ Backport from mainline ++ 2015-10-21 Uros Bizjak ++ ++ PR target/68018 ++ * config/i386/i386.c (ix86_compute_frame_layout): Realign the stack ++ for 64-bit MS_ABI targets also when default incoming stack boundary ++ is overriden. ++ +2015-10-20 Szabolcs Nagy + + Backport from mainline r229024 @@ -17343,7 +17453,7 @@ + Backport from mainline + 2015-08-21 Dominik Vogt + -+ * config/s390/s390-builtins.def: Fix value range of vec_load_bndry. ++ * config/s390/s390-builtins.def: Fix value range of vec_load_bndry. + +2015-08-24 Michael Meissner + @@ -17872,7 +17982,7 @@ 2015-07-16 Release Manager * GCC 5.2.0 released. -@@ -119,8 +1389,8 @@ +@@ -119,8 +1453,8 @@ 2015-07-09 Iain Sandoe PR target/66523 @@ -17883,7 +17993,7 @@ 2015-07-06 Alan Lawrence -@@ -181,21 +1451,8 @@ +@@ -181,21 +1515,8 @@ * doc/invoke.texi (i386 and x86-64 Options): -mno-fancy-math-387 is not actually the default on FreeBSD. @@ -18112,6 +18222,33 @@ + /* { dg-final { scan-assembler-not "vnmul\\.f32" } } */ + return -a * b; +} +Index: gcc/testsuite/gcc.target/arm/pr67929_1.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/arm/pr67929_1.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gcc.target/arm/pr67929_1.c (.../branches/gcc-5-branch) +@@ -0,0 +1,21 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_vfp3_ok } */ ++/* { dg-options "-O2 -fno-inline" } */ ++/* { dg-add-options arm_vfp3 } */ ++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ ++ ++int ++foo (float a) ++{ ++ return a * 4.9f; ++} ++ ++ ++int ++main (void) ++{ ++ if (foo (10.0f) != 49) ++ __builtin_abort (); ++ ++ return 0; ++} +\ No newline at end of file Index: gcc/testsuite/gcc.target/arm/pr66912.c =================================================================== --- a/src/gcc/testsuite/gcc.target/arm/pr66912.c (.../tags/gcc_5_2_0_release) @@ -19342,6 +19479,21 @@ /* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */ int outer_function (int x, int y) +Index: gcc/testsuite/gcc.target/i386/pr68018.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/pr68018.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gcc.target/i386/pr68018.c (.../branches/gcc-5-branch) +@@ -0,0 +1,10 @@ ++/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ ++/* { dg-options "-O -mabi=ms -mstackrealign" } */ ++ ++typedef float V __attribute__ ((vector_size (16))); ++ ++int fn1 (V * x) ++{ ++ V a = *x; ++ return a[0]; ++} Index: gcc/testsuite/gcc.target/i386/pr66814.c =================================================================== --- a/src/gcc/testsuite/gcc.target/i386/pr66814.c (.../tags/gcc_5_2_0_release) @@ -19524,6 +19676,35 @@ + *d = 1; + goto *a; +} +Index: gcc/testsuite/gcc.target/s390/pr68015.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/s390/pr68015.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gcc.target/s390/pr68015.c (.../branches/gcc-5-branch) +@@ -0,0 +1,24 @@ ++/* { dg-compile } */ ++/* { dg-options "-O2 -march=z196" } */ ++ ++extern long useme (long, ...); ++ ++void ++foo (void) ++{ ++ long secs = useme (41); ++ long utc_secs = useme (42); ++ long h, m; ++ ++ utc_secs = useme (42); ++ h = secs / 3600; ++ m = secs / 60; ++ if (utc_secs >= 86400) ++ { ++ m = 59; ++ h--; ++ if (h < 0) ++ h = 23; ++ } ++ useme (h, m); ++} Index: gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c =================================================================== --- a/src/gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c (.../tags/gcc_5_2_0_release) @@ -20025,6 +20206,35 @@ + type(t), parameter :: vec(*) = [(t(i), i = 1, 4)] + type(t), parameter :: arr(*) = reshape(vec, [2, 2]) ! { dg-error "ranks 1 and 2 in assignment" } +end +Index: gcc/testsuite/gfortran.dg/pr58754.f90 +=================================================================== +--- a/src/gcc/testsuite/gfortran.dg/pr58754.f90 (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gfortran.dg/pr58754.f90 (.../branches/gcc-5-branch) +@@ -0,0 +1,24 @@ ++! { dg-do compile } ++! ++! Tests the fix for PR58754 ++! ++ type :: char_type ++ character, allocatable :: chr (:) ++ end type ++ character, allocatable :: c(:) ++ type(char_type) :: d ++ character :: t(1) = ["w"] ++ ++ allocate (c (1), source = t) ++ if (any (c .ne. t)) call abort ++ c = ["a"] ++ if (any (c .ne. ["a"])) call abort ++ deallocate (c) ++ ++! Check allocatable character components, whilst we are about it. ++ allocate (d%chr (2), source = [t, char (ichar (t) + 1)]) ++ if (any (d%chr .ne. ["w", "x"])) call abort ++ d%chr = ["a","b","c","d"] ++ if (any (d%chr .ne. ["a","b","c","d"])) call abort ++ deallocate (d%chr) ++end Index: gcc/testsuite/gfortran.dg/pr56520.f90 =================================================================== --- a/src/gcc/testsuite/gfortran.dg/pr56520.f90 (.../tags/gcc_5_2_0_release) @@ -20386,6 +20596,55 @@ + data x /t()/ ! Prior to patch, this would ICE. + end block +end +Index: gcc/testsuite/gfortran.dg/move_alloc_16.f90 +=================================================================== +--- a/src/gcc/testsuite/gfortran.dg/move_alloc_16.f90 (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gfortran.dg/move_alloc_16.f90 (.../branches/gcc-5-branch) +@@ -0,0 +1,44 @@ ++! { dg-do run } ++! ++! Tests the fix for PR67177 in which MOVE_ALLOC was not assigning the string ++! length for deferred length characters. ++! ++! Contributed by ++! ++program str ++ implicit none ++ ++ type string ++ character(:), Allocatable :: text ++ end type string ++ ++ type strings ++ type(string), allocatable, dimension(:) :: strlist ++ end type strings ++ ++ type(strings) :: teststrs ++ type(string) :: tmpstr ++ integer :: strlen = 20 ++ ++ allocate (teststrs%strlist(1)) ++ allocate (character(len=strlen) :: tmpstr%text) ++ ++ allocate (character(len=strlen) :: teststrs%strlist(1)%text) ++ ++! Full string reference was required because reallocation on assignment is ++! functioning when it should not if the lhs is a substring - PR67977 ++ tmpstr%text(1:3) = 'foo' ++ ++ if (.not.allocated (teststrs%strlist(1)%text)) call abort ++ if (len (tmpstr%text) .ne. strlen) call abort ++ ++ call move_alloc(tmpstr%text,teststrs%strlist(1)%text) ++ ++ if (.not.allocated (teststrs%strlist(1)%text)) call abort ++ if (len (teststrs%strlist(1)%text) .ne. strlen) call abort ++ if (trim (teststrs%strlist(1)%text(1:3)) .ne. 'foo') call abort ++ ++! Clean up so that valgrind reports all allocated memory freed. ++ if (allocated (teststrs%strlist(1)%text)) deallocate (teststrs%strlist(1)%text) ++ if (allocated (teststrs%strlist)) deallocate (teststrs%strlist) ++end program str Index: gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_1.f03 =================================================================== --- a/src/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_1.f03 (.../tags/gcc_5_2_0_release) @@ -20661,6 +20920,33 @@ + +end program alloc_comp_copy_test + +Index: gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90 +=================================================================== +--- a/src/gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90 (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90 (.../branches/gcc-5-branch) +@@ -0,0 +1,22 @@ ++! { dg-do run } ++! ++! Checks the fix for PR67977 in which automatic reallocation on assignment ++! was performed when the lhs had a substring reference. ++! ++! Contributed by Anton Shterenlikht ++! ++ character(:), allocatable :: z ++ integer :: length ++ z = "cockatoo" ++ length = len (z) ++ z(:) = '' ++ if (len(z) .ne. length) call abort ++ if (trim (z) .ne. '') call abort ++ z(:3) = "foo" ++ if (len(z) .ne. length) call abort ++ if (trim (z) .ne. "foo") call abort ++ z(4:) = "__bar" ++ if (len(z) .ne. length) call abort ++ if (trim (z) .ne. "foo__bar") call abort ++ deallocate (z) ++end Index: gcc/testsuite/gfortran.dg/coarray_collectives_16.f90 =================================================================== --- a/src/gcc/testsuite/gfortran.dg/coarray_collectives_16.f90 (.../tags/gcc_5_2_0_release) @@ -20775,6 +21061,99 @@ + end select + end subroutine +end program +Index: gcc/testsuite/gfortran.dg/move_alloc_15.f90 +=================================================================== +--- a/src/gcc/testsuite/gfortran.dg/move_alloc_15.f90 (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gfortran.dg/move_alloc_15.f90 (.../branches/gcc-5-branch) +@@ -0,0 +1,88 @@ ++! { dg-do run } ++! { dg-options "-fdump-tree-original" } ++! ++! Fix for PR...... ++! ++! The 'to' components of 'mytemp' would remain allocated after the call to ++! MOVE_ALLOC, resulting in memory leaks. ++! ++! Contributed by Alberto Luaces. ++! ++! See https://groups.google.com/forum/#!topic/comp.lang.fortran/k3bkKUbOpFU ++! ++module alloctest ++ type myallocatable ++ integer, allocatable:: i(:) ++ end type myallocatable ++ ++contains ++ subroutine f(num, array) ++ implicit none ++ integer, intent(in) :: num ++ integer :: i ++ type(myallocatable):: array(:) ++ ++ do i = 1, num ++ allocate(array(i)%i(5), source = [1,2,3,4,5]) ++ end do ++ ++ end subroutine f ++end module alloctest ++ ++program name ++ use alloctest ++ implicit none ++ type(myallocatable), allocatable:: myarray(:), mytemp(:) ++ integer, parameter:: OLDSIZE = 7, NEWSIZE = 20 ++ logical :: flag ++ ++ allocate(myarray(OLDSIZE)) ++ call f(size(myarray), myarray) ++ ++ allocate(mytemp(NEWSIZE)) ++ mytemp(1:OLDSIZE) = myarray ++ ++ flag = .false. ++ call foo ++ call bar ++ ++ deallocate(myarray) ++ if (allocated (mytemp)) deallocate (mytemp) ++ ++ allocate(myarray(OLDSIZE)) ++ call f(size(myarray), myarray) ++ ++ allocate(mytemp(NEWSIZE)) ++ mytemp(1:OLDSIZE) = myarray ++ ++! Verfify that there is no segfault if the allocatable components ++! are deallocated before the call to move_alloc ++ flag = .true. ++ call foo ++ call bar ++ ++ deallocate(myarray) ++contains ++ subroutine foo ++ integer :: i ++ if (flag) then ++ do i = 1, OLDSIZE ++ deallocate (mytemp(i)%i) ++ end do ++ end if ++ call move_alloc(mytemp, myarray) ++ end subroutine ++ ++ subroutine bar ++ integer :: i ++ do i = 1, OLDSIZE ++ if (.not.flag .and. allocated (myarray(i)%i)) then ++ if (any (myarray(i)%i .ne. [1,2,3,4,5])) call abort ++ else ++ if (.not.flag) call abort ++ end if ++ end do ++ end subroutine ++end program name ++! { dg-final { scan-tree-dump-times "__builtin_malloc" 11 "original" } } ++! { dg-final { scan-tree-dump-times "__builtin_free" 11 "original" } } Index: gcc/testsuite/gfortran.dg/char_length_2.f90 =================================================================== --- a/src/gcc/testsuite/gfortran.dg/char_length_2.f90 (.../tags/gcc_5_2_0_release) @@ -21711,6 +22090,26 @@ + asm volatile ("" : : "g" (b) : "memory"); + return 0; +} +Index: gcc/testsuite/gcc.dg/torture/pr67794.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/torture/pr67794.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gcc.dg/torture/pr67794.c (.../branches/gcc-5-branch) +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++int *b; ++static void fn1(int *best, int *dmin) { ++ int a[64]; ++ dmin = a; ++ __asm__ volatile("" : "+&r"(dmin) : ""(best)); ++} ++ ++__attribute__((always_inline)) static inline void fn2(int *best) { fn1(best, b); } ++ ++void fn3(void) { ++ int c[1]; ++ fn2(c); ++} Index: gcc/testsuite/gcc.dg/torture/pr67442.c =================================================================== --- a/src/gcc/testsuite/gcc.dg/torture/pr67442.c (.../tags/gcc_5_2_0_release) @@ -22029,6 +22428,45 @@ + fn1 (**f); + return 0; +} +Index: gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c (.../branches/gcc-5-branch) +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details" } */ ++ ++extern void consume (int); ++extern int glob, glob1, glob2; ++extern int get (void); ++ ++ ++static void __attribute__ ((noinline)) ++foo (int a) ++{ ++ a = glob; ++ consume (a); ++ a = get (); ++ consume (a); ++ __asm__ volatile("" : : ""(a)); ++ consume (a); ++ ++ if (glob1) ++ a = glob1; ++ else ++ a = glob2; ++ consume (a); ++} ++ ++int ++bar (int a) ++{ ++ foo (a); ++ glob = a; ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "replacing an SSA name of a removed param" 4 "eipa_sra" } } */ Index: gcc/testsuite/gcc.dg/pr67028.c =================================================================== --- a/src/gcc/testsuite/gcc.dg/pr67028.c (.../tags/gcc_5_2_0_release) @@ -22059,7 +22497,59 @@ =================================================================== --- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_5_2_0_release) +++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-5-branch) -@@ -1,3 +1,797 @@ +@@ -1,3 +1,849 @@ ++2015-10-27 Kyrylo Tkachov ++ ++ Backport from mainline ++ 2015-10-26 Kyrylo Tkachov ++ ++ PR middle-end/67989 ++ * g++.dg/pr67989.C: New test. ++ ++2015-10-27 Kyrylo Tkachov ++ ++ PR target/67929 ++ * gcc.target/arm/pr67929_1.c: New test. ++ ++2015-10-26 Paul Thomas ++ ++ Backport from mainline r228940: ++ PR fortran/67177 ++ * gfortran.dg/move_alloc_15.f90: New test ++ * gfortran.dg/move_alloc_16.f90: New test ++ ++ PR fortran/67977 ++ * gfortran.dg/deferred_character_assignment_1.f90: New test ++ ++2015-10-26 Martin Jambor ++ ++ Backport from mainline ++ 2015-10-09 Martin Jambor ++ ++ * gcc.dg/ipa/ipa-sra-10.c: New test. ++ * gcc.dg/torture/pr67794.c: Likewise. ++ ++2015-10-22 Paul Thomas ++ ++ PR fortran/58754 ++ * gfortran.dg/pr58754.f90: New test ++ ++2015-10-22 Andreas Krebbel ++ ++ Backport from mainline ++ 2015-10-22 Andreas Krebbel ++ ++ PR target/68015 ++ * gcc.target/s390/pr68015.c: New test. ++ ++2015-10-22 Uros Bizjak ++ ++ Backport from mainline ++ 2015-10-21 Uros Bizjak ++ ++ PR target/68018 ++ * gcc.target/i386/pr68018.c: New test. ++ +2015-10-20 Szabolcs Nagy + + Backport from mainline r229024 @@ -22272,8 +22762,8 @@ + +2015-04-28 Kirill Yukhin + -+ * gcc.target/i386/avx512vbmi-check.h (main): Fix register -+ name while checking for AVX-512VBMI presence. ++ * gcc.target/i386/avx512vbmi-check.h (main): Fix register ++ name while checking for AVX-512VBMI presence. + +2015-09-25 Steven G. Kargl + @@ -22485,7 +22975,7 @@ + Backport from mainline + 2015-08-21 Dominik Vogt + -+ * gcc.target/s390/zvector/vec-load_bndry-1.c: New test. ++ * gcc.target/s390/zvector/vec-load_bndry-1.c: New test. + +2015-08-24 Michael Meissner + @@ -22622,9 +23112,9 @@ + backport from trunk: + 2015-07-21 Alex Velenko + -+ * gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if): ++ * gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if): + Skip -march=armv4t. -+ (dg-additional-options): Set armv5t flag. ++ (dg-additional-options): Set armv5t flag. + +2015-07-25 Tom de Vries + @@ -22857,7 +23347,7 @@ 2015-07-16 Release Manager * GCC 5.2.0 released. -@@ -792,7 +1586,7 @@ +@@ -792,7 +1638,7 @@ Add missing ChangeLog entry for r222341. Backport from trunk r222273 @@ -23062,6 +23552,38 @@ +int main() { + static_assert(A(42), ""); +} +Index: gcc/testsuite/g++.dg/cpp0x/anon-union1.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/cpp0x/anon-union1.C (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/g++.dg/cpp0x/anon-union1.C (.../branches/gcc-5-branch) +@@ -0,0 +1,27 @@ ++// PR c++/66583 ++// { dg-do run { target c++11 } } ++ ++template ++T&& move(T& t) { return static_cast(t); } ++ ++struct A { ++ A() { }; ++ A(const A&) { } ++}; ++ ++struct B { ++ union { ++ int m_1 = 0; ++ int m_2; ++ }; ++ A dummy; ++}; ++ ++int main() ++{ ++ B b; ++ b.m_1 = 1; ++ B c = move(b); ++ if (c.m_1 != 1) ++ __builtin_abort(); ++} Index: gcc/testsuite/g++.dg/cpp0x/alignas2.C =================================================================== --- a/src/gcc/testsuite/g++.dg/cpp0x/alignas2.C (.../tags/gcc_5_2_0_release) @@ -23136,6 +23658,86 @@ +void *operator new[](std::size_t, std::nothrow_t &) noexcept { + return vespamalloc::_GmemP->malloc(0); +} +Index: gcc/testsuite/g++.dg/pr67989.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/pr67989.C (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/g++.dg/pr67989.C (.../branches/gcc-5-branch) +@@ -0,0 +1,75 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c++11 -O2" } */ ++/* { dg-additional-options "-marm -march=armv4t" { target arm*-*-* } } */ ++ ++__extension__ typedef unsigned long long int uint64_t; ++namespace std __attribute__ ((__visibility__ ("default"))) ++{ ++ typedef enum memory_order ++ { ++ memory_order_seq_cst ++ } memory_order; ++} ++ ++namespace std __attribute__ ((__visibility__ ("default"))) ++{ ++ template < typename _Tp > struct atomic ++ { ++ static constexpr int _S_min_alignment ++ = (sizeof (_Tp) & (sizeof (_Tp) - 1)) || sizeof (_Tp) > 16 ++ ? 0 : sizeof (_Tp); ++ static constexpr int _S_alignment ++ = _S_min_alignment > alignof (_Tp) ? _S_min_alignment : alignof (_Tp); ++ alignas (_S_alignment) _Tp _M_i; ++ operator _Tp () const noexcept ++ { ++ return load (); ++ } ++ _Tp load (memory_order __m = memory_order_seq_cst) const noexcept ++ { ++ _Tp tmp; ++ __atomic_load (&_M_i, &tmp, __m); ++ } ++ }; ++} ++ ++namespace lldb_private ++{ ++ namespace imp ++ { ++ } ++ class Address; ++} ++namespace lldb ++{ ++ typedef uint64_t addr_t; ++ class SBSection ++ { ++ }; ++ class SBAddress ++ { ++ void SetAddress (lldb::SBSection section, lldb::addr_t offset); ++ lldb_private::Address & ref (); ++ }; ++} ++namespace lldb_private ++{ ++ class Address ++ { ++ public: ++ const Address & SetOffset (lldb::addr_t offset) ++ { ++ bool changed = m_offset != offset; ++ } ++ std::atomic < lldb::addr_t > m_offset; ++ }; ++} ++ ++using namespace lldb; ++using namespace lldb_private; ++void ++SBAddress::SetAddress (lldb::SBSection section, lldb::addr_t offset) ++{ ++ Address & addr = ref (); ++ addr.SetOffset (offset); ++} Index: gcc/testsuite/g++.dg/cpp1y/var-templ43.C =================================================================== --- a/src/gcc/testsuite/g++.dg/cpp1y/var-templ43.C (.../tags/gcc_5_2_0_release) @@ -23624,6 +24226,61 @@ + for (I i = x; i < y; ++i) // { dg-error "no match for" } + ; +} +Index: gcc/testsuite/g++.dg/init/elide3.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/init/elide3.C (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/testsuite/g++.dg/init/elide3.C (.../branches/gcc-5-branch) +@@ -0,0 +1,50 @@ ++// PR c++/67557 ++// { dg-do run } ++ ++namespace std ++{ ++ struct string ++ { ++ typedef unsigned long size_type; ++ const char* _M_p; ++ char _M_local_buf[1]; ++ ++ string(const char* s) : _M_p(_M_local_buf) ++ { ++ __builtin_printf("%p constructed\n", this); ++ } ++ ++ string(const string& s) : _M_p(_M_local_buf) ++ { ++ __builtin_printf("%p copied from %p\n", this, &s); ++ } ++ ++ ~string() ++ { ++ __builtin_printf("%p destroyed\n", this); ++ if (_M_p != _M_local_buf) ++ __builtin_abort(); ++ } ++ }; ++} ++ ++struct StartTag ++{ ++ explicit StartTag(std::string const & tag) : tag_(tag), keepempty_(false) {} ++ std::string tag_; ++ bool keepempty_; ++}; ++ ++StartTag fontToStartTag() { return StartTag(""); } ++ ++struct FontTag : public StartTag ++{ ++ FontTag() : StartTag(fontToStartTag()) {} ++}; ++ ++int main() ++{ ++ FontTag x; ++ __builtin_printf("%p x.tag_ in main()\n", &x.tag_); ++ return 0; ++} Index: gcc/testsuite/g++.dg/init/pr66857.C =================================================================== --- a/src/gcc/testsuite/g++.dg/init/pr66857.C (.../tags/gcc_5_2_0_release) @@ -23873,6 +24530,50 @@ + x += 16, y += x & 15, z += x & 15; + return x + y + z; +} +Index: gcc/cp/init.c +=================================================================== +--- a/src/gcc/cp/init.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/cp/init.c (.../branches/gcc-5-branch) +@@ -851,6 +851,18 @@ + return list; + } + ++/* Return the innermost aggregate scope for FIELD, whether that is ++ the enclosing class or an anonymous aggregate within it. */ ++ ++static tree ++innermost_aggr_scope (tree field) ++{ ++ if (ANON_AGGR_TYPE_P (TREE_TYPE (field))) ++ return TREE_TYPE (field); ++ else ++ return DECL_CONTEXT (field); ++} ++ + /* The MEM_INITS are a TREE_LIST. The TREE_PURPOSE of each list gives + a FIELD_DECL or BINFO in T that needs initialization. The + TREE_VALUE gives the initializer, or list of initializer arguments. +@@ -994,7 +1006,7 @@ + + /* See if this field is a member of a union, or a member of a + structure contained in a union, etc. */ +- for (ctx = DECL_CONTEXT (field); ++ for (ctx = innermost_aggr_scope (field); + !same_type_p (ctx, t); + ctx = TYPE_CONTEXT (ctx)) + if (TREE_CODE (ctx) == UNION_TYPE +@@ -1027,8 +1039,9 @@ + union { struct { int i; int j; }; }; + + initializing both `i' and `j' makes sense. */ +- ctx = common_enclosing_class (DECL_CONTEXT (field), +- DECL_CONTEXT (TREE_PURPOSE (*last_p))); ++ ctx = common_enclosing_class ++ (innermost_aggr_scope (field), ++ innermost_aggr_scope (TREE_PURPOSE (*last_p))); + + if (ctx && TREE_CODE (ctx) == UNION_TYPE) + { Index: gcc/cp/class.c =================================================================== --- a/src/gcc/cp/class.c (.../tags/gcc_5_2_0_release) @@ -24007,7 +24708,18 @@ =================================================================== --- a/src/gcc/cp/ChangeLog (.../tags/gcc_5_2_0_release) +++ b/src/gcc/cp/ChangeLog (.../branches/gcc-5-branch) -@@ -1,3 +1,128 @@ +@@ -1,3 +1,139 @@ ++2015-10-20 Jason Merrill ++ ++ PR c++/66583 ++ * init.c (innermost_aggr_scope): New. ++ (sort_mem_initializers): Use it. ++ ++ PR c++/67557 ++ * call.c (is_base_field_ref): New. ++ (unsafe_copy_elision_p): New. ++ (build_over_call): Use it. ++ +2015-09-12 Paolo Carlini + + Backport from mainline @@ -24385,6 +25097,61 @@ || (n = tree_to_shwi (num)) <= 0 || (int) n != n) { +Index: gcc/cp/call.c +=================================================================== +--- a/src/gcc/cp/call.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/cp/call.c (.../branches/gcc-5-branch) +@@ -7002,6 +7002,39 @@ + return r; + } + ++/* Return true iff T refers to a base field. */ ++ ++static bool ++is_base_field_ref (tree t) ++{ ++ STRIP_NOPS (t); ++ if (TREE_CODE (t) == ADDR_EXPR) ++ t = TREE_OPERAND (t, 0); ++ if (TREE_CODE (t) == COMPONENT_REF) ++ t = TREE_OPERAND (t, 1); ++ if (TREE_CODE (t) == FIELD_DECL) ++ return DECL_FIELD_IS_BASE (t); ++ return false; ++} ++ ++/* We can't elide a copy from a function returning by value to a base ++ subobject, as the callee might clobber tail padding. Return true iff this ++ could be that case. */ ++ ++static bool ++unsafe_copy_elision_p (tree target, tree exp) ++{ ++ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (exp)); ++ if (type == CLASSTYPE_AS_BASE (type)) ++ return false; ++ if (!is_base_field_ref (target) ++ && resolves_to_fixed_type_p (target, NULL)) ++ return false; ++ tree init = TARGET_EXPR_INITIAL (exp); ++ return (TREE_CODE (init) == AGGR_INIT_EXPR ++ && !AGGR_INIT_VIA_CTOR_P (init)); ++} ++ + /* Subroutine of the various build_*_call functions. Overload resolution + has chosen a winning candidate CAND; build up a CALL_EXPR accordingly. + ARGS is a TREE_LIST of the unconverted arguments to the call. FLAGS is a +@@ -7419,7 +7452,9 @@ + else if (trivial) + return force_target_expr (DECL_CONTEXT (fn), arg, complain); + } +- else if (TREE_CODE (arg) == TARGET_EXPR || trivial) ++ else if (trivial ++ || (TREE_CODE (arg) == TARGET_EXPR ++ && !unsafe_copy_elision_p (fa, arg))) + { + tree to = stabilize_reference (cp_build_indirect_ref (fa, RO_NULL, + complain)); Index: gcc/cp/cvt.c =================================================================== --- a/src/gcc/cp/cvt.c (.../tags/gcc_5_2_0_release) @@ -24460,6 +25227,29 @@ && TREE_CODE (BLOCK_SUPERCONTEXT (block)) == FUNCTION_DECL)) threshold /= 10; if (size > threshold) +Index: gcc/dojump.c +=================================================================== +--- a/src/gcc/dojump.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/dojump.c (.../branches/gcc-5-branch) +@@ -1217,12 +1217,12 @@ + If one side isn't, we want a noncanonicalized comparison. See PR + middle-end/17564. */ + if (HAVE_canonicalize_funcptr_for_compare +- && TREE_CODE (TREE_TYPE (treeop0)) == POINTER_TYPE +- && TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0))) +- == FUNCTION_TYPE +- && TREE_CODE (TREE_TYPE (treeop1)) == POINTER_TYPE +- && TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1))) +- == FUNCTION_TYPE) ++ && POINTER_TYPE_P (TREE_TYPE (treeop0)) ++ && POINTER_TYPE_P (TREE_TYPE (treeop1)) ++ && (TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0))) == FUNCTION_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0))) == METHOD_TYPE) ++ && (TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1))) == FUNCTION_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1))) == METHOD_TYPE)) + { + rtx new_op0 = gen_reg_rtx (mode); + rtx new_op1 = gen_reg_rtx (mode); Index: gcc/lto-cgraph.c =================================================================== --- a/src/gcc/lto-cgraph.c (.../tags/gcc_5_2_0_release) @@ -24882,7 +25672,26 @@ { /* Take care about non-array allocatable components here. The alloc_* routine below is motivated by the alloc_scalar_allocatable_for_ -@@ -8975,7 +9013,6 @@ +@@ -8634,6 +8672,7 @@ + tree jump_label1; + tree jump_label2; + gfc_se lse; ++ gfc_ref *ref; + + if (!expr1 || expr1->rank) + return; +@@ -8641,6 +8680,10 @@ + if (!expr2 || expr2->rank) + return; + ++ for (ref = expr1->ref; ref; ref = ref->next) ++ if (ref->type == REF_SUBSTRING) ++ return; ++ + realloc_lhs_warning (expr2->ts.type, false, &expr2->where); + + /* Since this is a scalar lhs, we can afford to do this. That is, +@@ -8975,7 +9018,6 @@ scalar_to_array = (expr2->ts.type == BT_DERIVED && expr2->ts.u.derived->attr.alloc_comp && !expr_is_variable (expr2) @@ -24890,7 +25699,7 @@ && expr1->rank && !expr2->rank); scalar_to_array |= (expr1->ts.type == BT_DERIVED && expr1->rank -@@ -8984,7 +9021,7 @@ +@@ -8984,7 +9026,7 @@ if (scalar_to_array && dealloc) { tmp = gfc_deallocate_alloc_comp_no_caf (expr2->ts.u.derived, rse.expr, 0); @@ -25256,7 +26065,31 @@ =================================================================== --- a/src/gcc/fortran/ChangeLog (.../tags/gcc_5_2_0_release) +++ b/src/gcc/fortran/ChangeLog (.../branches/gcc-5-branch) -@@ -1,3 +1,189 @@ +@@ -1,3 +1,213 @@ ++2015-10-26 Paul Thomas ++ ++ PR fortran/67177 ++ PR fortran/67977 ++ Backport from mainline r228940: ++ * primary.c (match_substring): Add an argument 'deferred' to ++ flag that a substring reference with null start and end should ++ not be optimized away for deferred length strings. ++ (match_string_constant, gfc_match_rvalue): Set the argument. ++ * trans-expr.c (alloc_scalar_allocatable_for_assignment): If ++ there is a substring reference return. ++ * trans-intrinsic.c (conv_intrinsic_move_alloc): For deferred ++ characters, assign the 'from' string length to the 'to' string ++ length. If the 'from' expression is deferred, set its string ++ length to zero. If the 'to' expression has allocatable ++ components, deallocate them. ++ ++2015-10-22 Paul Thomas ++ ++ PR fortran/58754 ++ * trans-stmt.c (gfc_trans_allocate): Do not use the scalar ++ character assignment if the allocate expression is an array ++ descriptor. ++ +2015-10-19 Steven G. Kargl + + PR fortran/67900 @@ -25272,7 +26105,7 @@ + + PR fortran/67987 + * decl.c (char_len_param_value): Unwrap unlong line. If LEN < 0, -+ force it to zero per the Fortran 90, 95, 2003, and 2008 Standards. ++ force it to zero per the Fortran 90, 95, 2003, and 2008 Standards. + * resolve.c (gfc_resolve_substring_charlen): Unwrap unlong line. + If 'start' is larger than 'end', length of substring is negative, + so explicitly set it to zero. @@ -25446,7 +26279,7 @@ 2015-07-16 Release Manager * GCC 5.2.0 released. -@@ -94,7 +280,7 @@ +@@ -94,7 +304,7 @@ 2015-05-19 Steven G. Kargl PR fortran/66052 @@ -25455,7 +26288,7 @@ 2015-05-19 Steven G. Kargl -@@ -106,7 +292,7 @@ +@@ -106,7 +316,7 @@ PR fortran/66044 * decl.c(gfc_match_entry): Change a gfc_internal_error() into @@ -25486,6 +26319,16 @@ } } +@@ -5618,7 +5629,8 @@ + tmp = gfc_copy_class_to_class (expr3, to, + nelems, upoly_expr); + } +- else if (code->expr3->ts.type == BT_CHARACTER) ++ else if (code->expr3->ts.type == BT_CHARACTER ++ && !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) + { + tmp = INDIRECT_REF_P (se.expr) ? + se.expr : Index: gcc/fortran/expr.c =================================================================== --- a/src/gcc/fortran/expr.c (.../tags/gcc_5_2_0_release) @@ -26046,6 +26889,33 @@ =================================================================== --- a/src/gcc/fortran/primary.c (.../tags/gcc_5_2_0_release) +++ b/src/gcc/fortran/primary.c (.../branches/gcc-5-branch) +@@ -748,7 +748,7 @@ + /* Match a substring reference. */ + + static match +-match_substring (gfc_charlen *cl, int init, gfc_ref **result) ++match_substring (gfc_charlen *cl, int init, gfc_ref **result, bool deferred) + { + gfc_expr *start, *end; + locus old_loc; +@@ -800,7 +800,7 @@ + } + + /* Optimize away the (:) reference. */ +- if (start == NULL && end == NULL) ++ if (start == NULL && end == NULL && !deferred) + ref = NULL; + else + { +@@ -1098,7 +1098,7 @@ + if (ret != -1) + gfc_internal_error ("match_string_constant(): Delimiter not found"); + +- if (match_substring (NULL, 0, &e->ref) != MATCH_NO) ++ if (match_substring (NULL, 0, &e->ref, false) != MATCH_NO) + e->expr_type = EXPR_SUBSTRING; + + *result = e; @@ -1202,6 +1202,9 @@ return MATCH_ERROR; } @@ -26056,7 +26926,17 @@ if (!gfc_numeric_ts (&sym->value->ts)) { gfc_error ("Numeric PARAMETER required in complex constant at %C"); -@@ -2642,7 +2645,7 @@ +@@ -2078,7 +2081,8 @@ + + if (primary->ts.type == BT_CHARACTER) + { +- switch (match_substring (primary->ts.u.cl, equiv_flag, &substring)) ++ bool def = primary->ts.deferred == 1; ++ switch (match_substring (primary->ts.u.cl, equiv_flag, &substring, def)) + { + case MATCH_YES: + if (tail == NULL) +@@ -2642,7 +2646,7 @@ gfc_expr *e; gfc_symtree *symtree; @@ -26065,6 +26945,15 @@ e = gfc_get_expr (); e->symtree = symtree; +@@ -3091,7 +3095,7 @@ + that we're not sure is a variable yet. */ + + if ((implicit_char || sym->ts.type == BT_CHARACTER) +- && match_substring (sym->ts.u.cl, 0, &e->ref) == MATCH_YES) ++ && match_substring (sym->ts.u.cl, 0, &e->ref, false) == MATCH_YES) + { + + e->expr_type = EXPR_VARIABLE; Index: gcc/fortran/trans-intrinsic.c =================================================================== --- a/src/gcc/fortran/trans-intrinsic.c (.../tags/gcc_5_2_0_release) @@ -26078,6 +26967,56 @@ fndecl = build_call_expr_loc (input_location, fndecl, 8, array, opr, opr_flags, image_index, stat, errmsg, strlen, errmsg_len); } +@@ -9360,6 +9360,16 @@ + } + } + ++ if (to_expr->ts.type == BT_CHARACTER && to_expr->ts.deferred) ++ { ++ gfc_add_modify_loc (input_location, &block, to_se.string_length, ++ fold_convert (TREE_TYPE (to_se.string_length), ++ from_se.string_length)); ++ if (from_expr->ts.deferred) ++ gfc_add_modify_loc (input_location, &block, from_se.string_length, ++ build_int_cst (TREE_TYPE (from_se.string_length), 0)); ++ } ++ + return gfc_finish_block (&block); + } + +@@ -9459,6 +9469,14 @@ + } + else + { ++ if (to_expr->ts.type == BT_DERIVED ++ && to_expr->ts.u.derived->attr.alloc_comp) ++ { ++ tmp = gfc_deallocate_alloc_comp (to_expr->ts.u.derived, ++ to_se.expr, to_expr->rank); ++ gfc_add_expr_to_block (&block, tmp); ++ } ++ + tmp = gfc_conv_descriptor_data_get (to_se.expr); + tmp = gfc_deallocate_with_status (tmp, NULL_TREE, NULL_TREE, NULL_TREE, + NULL_TREE, true, to_expr, false); +@@ -9473,6 +9491,17 @@ + gfc_add_modify_loc (input_location, &block, tmp, + fold_convert (TREE_TYPE (tmp), null_pointer_node)); + ++ ++ if (to_expr->ts.type == BT_CHARACTER && to_expr->ts.deferred) ++ { ++ gfc_add_modify_loc (input_location, &block, to_se.string_length, ++ fold_convert (TREE_TYPE (to_se.string_length), ++ from_se.string_length)); ++ if (from_expr->ts.deferred) ++ gfc_add_modify_loc (input_location, &block, from_se.string_length, ++ build_int_cst (TREE_TYPE (from_se.string_length), 0)); ++ } ++ + return gfc_finish_block (&block); + } + Index: gcc/fortran/simplify.c =================================================================== --- a/src/gcc/fortran/simplify.c (.../tags/gcc_5_2_0_release) @@ -27629,6 +28568,146 @@ cfun->has_local_explicit_reg_vars = false; /* Remove unmarked local and global vars from local_decls. */ +Index: gcc/tree-sra.c +=================================================================== +--- a/src/gcc/tree-sra.c (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/tree-sra.c (.../branches/gcc-5-branch) +@@ -4580,61 +4580,45 @@ + return NULL; + } + +-/* If the statement STMT defines an SSA_NAME of a parameter which is to be +- removed because its value is not used, replace the SSA_NAME with a one +- relating to a created VAR_DECL together all of its uses and return true. +- ADJUSTMENTS is a pointer to an adjustments vector. */ ++/* If OLD_NAME, which is being defined by statement STMT, is an SSA_NAME of a ++ parameter which is to be removed because its value is not used, create a new ++ SSA_NAME relating to a replacement VAR_DECL, replace all uses of the ++ original with it and return it. If there is no need to re-map, return NULL. ++ ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments. */ + +-static bool +-replace_removed_params_ssa_names (gimple stmt, ++static tree ++replace_removed_params_ssa_names (tree old_name, gimple stmt, + ipa_parm_adjustment_vec adjustments) + { + struct ipa_parm_adjustment *adj; +- tree lhs, decl, repl, name; ++ tree decl, repl, new_name; + +- if (gimple_code (stmt) == GIMPLE_PHI) +- lhs = gimple_phi_result (stmt); +- else if (is_gimple_assign (stmt)) +- lhs = gimple_assign_lhs (stmt); +- else if (is_gimple_call (stmt)) +- lhs = gimple_call_lhs (stmt); +- else +- gcc_unreachable (); ++ if (TREE_CODE (old_name) != SSA_NAME) ++ return NULL; + +- if (TREE_CODE (lhs) != SSA_NAME) +- return false; +- +- decl = SSA_NAME_VAR (lhs); ++ decl = SSA_NAME_VAR (old_name); + if (decl == NULL_TREE + || TREE_CODE (decl) != PARM_DECL) +- return false; ++ return NULL; + + adj = get_adjustment_for_base (adjustments, decl); + if (!adj) +- return false; ++ return NULL; + + repl = get_replaced_param_substitute (adj); +- name = make_ssa_name (repl, stmt); ++ new_name = make_ssa_name (repl, stmt); + + if (dump_file) + { + fprintf (dump_file, "replacing an SSA name of a removed param "); +- print_generic_expr (dump_file, lhs, 0); ++ print_generic_expr (dump_file, old_name, 0); + fprintf (dump_file, " with "); +- print_generic_expr (dump_file, name, 0); ++ print_generic_expr (dump_file, new_name, 0); + fprintf (dump_file, "\n"); + } + +- if (is_gimple_assign (stmt)) +- gimple_assign_set_lhs (stmt, name); +- else if (is_gimple_call (stmt)) +- gimple_call_set_lhs (stmt, name); +- else +- gimple_phi_set_result (as_a (stmt), name); +- +- replace_uses_by (lhs, name); +- release_ssa_name (lhs); +- return true; ++ replace_uses_by (old_name, new_name); ++ return new_name; + } + + /* If the statement STMT contains any expressions that need to replaced with a +@@ -4713,7 +4697,16 @@ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- replace_removed_params_ssa_names (gsi_stmt (gsi), adjustments); ++ { ++ gphi *phi = as_a (gsi_stmt (gsi)); ++ tree new_lhs, old_lhs = gimple_phi_result (phi); ++ new_lhs = replace_removed_params_ssa_names (old_lhs, phi, adjustments); ++ if (new_lhs) ++ { ++ gimple_phi_set_result (phi, new_lhs); ++ release_ssa_name (old_lhs); ++ } ++ } + + gsi = gsi_start_bb (bb); + while (!gsi_end_p (gsi)) +@@ -4733,7 +4726,6 @@ + + case GIMPLE_ASSIGN: + modified |= sra_ipa_modify_assign (stmt, &gsi, adjustments); +- modified |= replace_removed_params_ssa_names (stmt, adjustments); + break; + + case GIMPLE_CALL: +@@ -4748,8 +4740,6 @@ + { + t = gimple_call_lhs_ptr (stmt); + modified |= ipa_modify_expr (t, false, adjustments); +- modified |= replace_removed_params_ssa_names (stmt, +- adjustments); + } + break; + +@@ -4773,6 +4763,20 @@ + break; + } + ++ def_operand_p defp; ++ ssa_op_iter iter; ++ FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF) ++ { ++ tree old_def = DEF_FROM_PTR (defp); ++ if (tree new_def = replace_removed_params_ssa_names (old_def, stmt, ++ adjustments)) ++ { ++ SET_DEF (defp, new_def); ++ release_ssa_name (old_def); ++ modified = true; ++ } ++ } ++ + if (modified) + { + update_stmt (stmt); Index: gcc/ipa-prop.c =================================================================== --- a/src/gcc/ipa-prop.c (.../tags/gcc_5_2_0_release) @@ -547512,7 +548591,23 @@ =================================================================== --- a/src/gcc/config/s390/s390.md (.../tags/gcc_5_2_0_release) +++ b/src/gcc/config/s390/s390.md (.../branches/gcc-5-branch) -@@ -9594,7 +9594,7 @@ +@@ -6044,8 +6044,13 @@ + (match_operand:GPR 2 "nonimmediate_operand" "") + (match_operand:GPR 3 "nonimmediate_operand" "")))] + "TARGET_Z196" +- "operands[1] = s390_emit_compare (GET_CODE (operands[1]), +- XEXP (operands[1], 0), XEXP (operands[1], 1));") ++{ ++ /* Emit the comparison insn in case we do not already have a comparison result. */ ++ if (!s390_comparison (operands[1], VOIDmode)) ++ operands[1] = s390_emit_compare (GET_CODE (operands[1]), ++ XEXP (operands[1], 0), ++ XEXP (operands[1], 1)); ++}) + + ; locr, loc, stoc, locgr, locg, stocg + (define_insn_and_split "*movcc" +@@ -9594,7 +9599,7 @@ { /* Unless this is a SEQ_CST fence, the s390 memory model is strong enough not to require barriers of any kind. */ @@ -547521,7 +548616,7 @@ { rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (mem) = 1; -@@ -9675,7 +9675,7 @@ +@@ -9675,7 +9680,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model "" { @@ -547530,7 +548625,7 @@ if (MEM_ALIGN (operands[0]) < GET_MODE_BITSIZE (GET_MODE (operands[0]))) FAIL; -@@ -9686,7 +9686,7 @@ +@@ -9686,7 +9691,7 @@ emit_insn (gen_atomic_storedi_1 (operands[0], operands[1])); else emit_move_insn (operands[0], operands[1]); @@ -548712,7 +549807,25 @@ return chkp_function_instrumented_p (current_function_decl); /* Complex values are returned in %st(0)/%st(1) pair. */ -@@ -10207,10 +10205,14 @@ +@@ -10104,11 +10102,14 @@ + frame->nregs = ix86_nsaved_regs (); + frame->nsseregs = ix86_nsaved_sseregs (); + +- /* 64-bit MS ABI seem to require stack alignment to be always 16 except for +- function prologues and leaf. */ ++ /* 64-bit MS ABI seem to require stack alignment to be always 16, ++ except for function prologues, leaf functions and when the defult ++ incoming stack boundary is overriden at command line or via ++ force_align_arg_pointer attribute. */ + if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) + && (!crtl->is_leaf || cfun->calls_alloca != 0 +- || ix86_current_function_calls_tls_descriptor)) ++ || ix86_current_function_calls_tls_descriptor ++ || ix86_incoming_stack_boundary < 128)) + { + crtl->preferred_stack_boundary = 128; + crtl->stack_alignment_needed = 128; +@@ -10207,10 +10208,14 @@ if (frame->nsseregs) { /* The only ABI that has saved SSE registers (Win64) also has a @@ -548731,7 +549844,7 @@ offset += frame->nsseregs * 16; } frame->sse_reg_save_offset = offset; -@@ -10220,7 +10222,7 @@ +@@ -10220,7 +10225,7 @@ sure that no value happens to be the same before and after, force the alignment computation below to add a non-zero value. */ if (stack_realign_fp) @@ -548740,7 +549853,7 @@ /* Va-arg area */ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; -@@ -10433,15 +10435,24 @@ +@@ -10433,15 +10438,24 @@ { struct machine_function *m = cfun->machine; rtx reg = gen_rtx_REG (mode, regno); @@ -548768,7 +549881,7 @@ RTX_FRAME_RELATED_P (insn) = 1; base = addr; -@@ -10489,6 +10500,9 @@ +@@ -10489,6 +10503,9 @@ mem = gen_rtx_MEM (mode, addr); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg)); } @@ -548778,7 +549891,7 @@ } /* Emit code to save registers using MOV insns. -@@ -10705,6 +10719,25 @@ +@@ -10705,6 +10722,25 @@ } } @@ -548804,7 +549917,7 @@ /* Return minimum incoming stack alignment. */ static unsigned int -@@ -10719,7 +10752,6 @@ +@@ -10719,7 +10755,6 @@ if -mstackrealign is used, it isn't used for sibcall check and estimated stack alignment is 128bit. */ else if (!sibcall @@ -548812,7 +549925,7 @@ && ix86_force_align_arg_pointer && crtl->stack_alignment_estimated == 128) incoming_stack_boundary = MIN_STACK_BOUNDARY; -@@ -11578,7 +11610,7 @@ +@@ -11578,7 +11613,7 @@ pointer is no longer valid. As for the value of sp_offset, see ix86_compute_frame_layout, which we need to match in order to pass verification of stack_pointer_offset at the end. */ @@ -548821,7 +549934,7 @@ m->fs.sp_valid = false; } -@@ -11991,12 +12023,27 @@ +@@ -11991,12 +12026,27 @@ { rtx reg = gen_rtx_REG (V4SFmode, regno); rtx mem; @@ -548851,7 +549964,7 @@ ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset); cfa_offset -= 16; -@@ -25140,7 +25187,8 @@ +@@ -25140,7 +25190,8 @@ dst = change_address (dst, BLKmode, destreg); set_mem_align (dst, desired_align * BITS_PER_UNIT); epilogue_size_needed = 0; @@ -548861,7 +549974,7 @@ { /* It is possible that we copied enough so the main loop will not execute. */ -@@ -25272,7 +25320,7 @@ +@@ -25272,7 +25323,7 @@ max_size -= align_bytes; } if (need_zero_guard @@ -548870,7 +549983,7 @@ && (count < (unsigned HOST_WIDE_INT) size_needed || (align_bytes == 0 && count < ((unsigned HOST_WIDE_INT) size_needed -@@ -25557,7 +25605,7 @@ +@@ -25557,7 +25608,7 @@ /* Avoid branch in fixing the byte. */ tmpreg = gen_lowpart (QImode, tmpreg); @@ -548879,7 +549992,7 @@ tmp = gen_rtx_REG (CCmode, FLAGS_REG); cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); -@@ -39522,53 +39570,41 @@ +@@ -39522,53 +39573,41 @@ return target; case IX86_BUILTIN_SBB32: @@ -548945,7 +550058,7 @@ op4 = expand_normal (arg3); if (!address_operand (op4, VOIDmode)) { -@@ -39575,8 +39611,17 @@ +@@ -39575,8 +39614,17 @@ op4 = convert_memory_address (Pmode, op4); op4 = copy_addr_to_reg (op4); } @@ -548964,7 +550077,7 @@ /* Return current CF value. */ if (target == 0) target = gen_reg_rtx (QImode); -@@ -39583,6 +39628,10 @@ +@@ -39583,6 +39631,10 @@ PUT_MODE (pat, QImode); emit_insn (gen_rtx_SET (VOIDmode, target, pat)); @@ -548975,7 +550088,7 @@ return target; case IX86_BUILTIN_READ_FLAGS: -@@ -46836,7 +46885,7 @@ +@@ -46836,7 +46888,7 @@ true }, /* force_align_arg_pointer says this function realigns the stack at entry. */ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, @@ -548984,7 +550097,7 @@ #if TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, -@@ -48957,6 +49006,62 @@ +@@ -48957,6 +49009,62 @@ return true; } @@ -549047,7 +550160,7 @@ /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even and extract-odd permutations. */ -@@ -49059,6 +49164,9 @@ +@@ -49059,6 +49167,9 @@ case V32QImode: return expand_vec_perm_even_odd_pack (d); @@ -549057,7 +550170,7 @@ case V4DImode: if (!TARGET_AVX2) { -@@ -49520,6 +49628,8 @@ +@@ -49520,6 +49631,8 @@ /* Try sequences of four instructions. */ @@ -549066,7 +550179,7 @@ if (expand_vec_perm_vpshufb2_vpermq (d)) return true; -@@ -50335,6 +50445,14 @@ +@@ -50335,6 +50448,14 @@ unsigned int size = INTVAL (operands[1]); unsigned int pos = INTVAL (operands[2]); @@ -549081,7 +550194,7 @@ if (GET_CODE (dst) == SUBREG) { pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; -@@ -50341,9 +50459,6 @@ +@@ -50341,9 +50462,6 @@ dst = SUBREG_REG (dst); } @@ -549091,7 +550204,7 @@ switch (GET_MODE (dst)) { case V16QImode: -@@ -50391,6 +50506,10 @@ +@@ -50391,6 +50509,10 @@ return false; } @@ -549102,7 +550215,7 @@ rtx d = dst; if (GET_MODE (dst) != dstmode) d = gen_reg_rtx (dstmode); -@@ -51516,7 +51635,7 @@ +@@ -51516,7 +51638,7 @@ static unsigned HOST_WIDE_INT ix86_memmodel_check (unsigned HOST_WIDE_INT val) { @@ -549111,7 +550224,7 @@ bool strong; if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE -@@ -51527,14 +51646,14 @@ +@@ -51527,14 +51649,14 @@ "Unknown architecture specific memory model"); return MEMMODEL_SEQ_CST; } @@ -549129,7 +550242,7 @@ { warning (OPT_Winvalid_memory_model, "HLE_RELEASE not used with RELEASE or stronger memory model"); -@@ -52307,9 +52426,6 @@ +@@ -52307,9 +52429,6 @@ #undef TARGET_CAN_INLINE_P #define TARGET_CAN_INLINE_P ix86_can_inline_p @@ -551047,7 +552160,57 @@ *cost = COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->fp[mode != SFmode].neg; -@@ -27678,8 +27685,8 @@ +@@ -27537,25 +27544,36 @@ + return 0; + } + ++/* If X is a CONST_DOUBLE with a value that is a power of 2 whose ++ log2 is in [1, 32], return that log2. Otherwise return -1. ++ This is used in the patterns for vcvt.s32.f32 floating-point to ++ fixed-point conversions. */ ++ + int +-vfp3_const_double_for_bits (rtx operand) ++vfp3_const_double_for_bits (rtx x) + { +- REAL_VALUE_TYPE r0; ++ if (!CONST_DOUBLE_P (x)) ++ return -1; + +- if (!CONST_DOUBLE_P (operand)) +- return 0; ++ REAL_VALUE_TYPE r; + +- REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); +- if (exact_real_truncate (DFmode, &r0)) +- { +- HOST_WIDE_INT value = real_to_integer (&r0); +- value = value & 0xffffffff; +- if ((value != 0) && ( (value & (value - 1)) == 0)) +- return int_log2 (value); +- } ++ REAL_VALUE_FROM_CONST_DOUBLE (r, x); ++ if (REAL_VALUE_NEGATIVE (r) ++ || REAL_VALUE_ISNAN (r) ++ || REAL_VALUE_ISINF (r) ++ || !real_isinteger (&r, SFmode)) ++ return -1; + +- return 0; ++ HOST_WIDE_INT hwint = exact_log2 (real_to_integer (&r)); ++ ++ /* The exact_log2 above will have returned -1 if this is ++ not an exact log2. */ ++ if (!IN_RANGE (hwint, 1, 32)) ++ return -1; ++ ++ return hwint; + } ++ + + /* Emit a memory barrier around an atomic sequence according to MODEL. */ + +@@ -27678,8 +27696,8 @@ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ if (TARGET_HAVE_LDACQ @@ -551058,7 +552221,7 @@ mod_s = GEN_INT (MEMMODEL_ACQ_REL); switch (mode) -@@ -27752,21 +27759,26 @@ +@@ -27752,21 +27770,26 @@ oldval = operands[2]; newval = operands[3]; is_weak = (operands[4] != const0_rtx); @@ -551094,7 +552257,7 @@ /* Checks whether a barrier is needed and emits one accordingly. */ if (!(use_acquire || use_release)) arm_pre_atomic_barrier (mod_s); -@@ -27803,14 +27815,15 @@ +@@ -27803,14 +27826,15 @@ emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); } @@ -551113,7 +552276,7 @@ emit_label (label2); } -@@ -27818,22 +27831,27 @@ +@@ -27818,22 +27842,27 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, rtx value, rtx model_rtx, rtx cond) { @@ -551148,7 +552311,7 @@ /* Checks whether a barrier is needed and emits one accordingly. */ if (!(use_acquire || use_release)) arm_pre_atomic_barrier (model); -@@ -27904,7 +27922,8 @@ +@@ -27904,7 +27933,8 @@ emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); /* Checks whether a barrier is needed and emits one accordingly. */ @@ -551158,7 +552321,7 @@ arm_post_atomic_barrier (model); } -@@ -28792,7 +28811,39 @@ +@@ -28792,7 +28822,39 @@ #undef BRANCH } @@ -551265,6 +552428,29 @@ (and (match_code "const_int") (match_test "(ival & 0xffff0000) == 0"))))) +@@ -338,7 +339,8 @@ + "@internal + In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation" + (and (match_code "const_double") +- (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_bits (op)"))) ++ (match_test "TARGET_32BIT && TARGET_VFP ++ && vfp3_const_double_for_bits (op) > 0"))) + + (define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" + "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.") +Index: gcc/config/arm/predicates.md +=================================================================== +--- a/src/gcc/config/arm/predicates.md (.../tags/gcc_5_2_0_release) ++++ b/src/gcc/config/arm/predicates.md (.../branches/gcc-5-branch) +@@ -668,7 +668,7 @@ + (define_predicate "const_double_vcvt_power_of_two" + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP +- && vfp3_const_double_for_bits (op)"))) ++ && vfp3_const_double_for_bits (op) > 0"))) + + (define_predicate "neon_struct_operand" + (and (match_code "mem") Index: gcc/config/arm/sync.md =================================================================== --- a/src/gcc/config/arm/sync.md (.../tags/gcc_5_2_0_release) @@ -553691,6 +554877,45 @@ #endif /* __MINGW32__ */ +Index: libffi/ChangeLog +=================================================================== +--- a/src/libffi/ChangeLog (.../tags/gcc_5_2_0_release) ++++ b/src/libffi/ChangeLog (.../branches/gcc-5-branch) +@@ -1,3 +1,9 @@ ++2015-10-26 John David Anglin ++ ++ PR libffi/65441 ++ * testsuite/lib/libffi.exp: Load target-supports-dg.exp. ++ * testsuite/libffi.call/float2.c: Don't run on hppa*-*-hpux*. ++ + 2015-07-16 Release Manager + + * GCC 5.2.0 released. +Index: libffi/testsuite/libffi.call/float2.c +=================================================================== +--- a/src/libffi/testsuite/libffi.call/float2.c (.../tags/gcc_5_2_0_release) ++++ b/src/libffi/testsuite/libffi.call/float2.c (.../branches/gcc-5-branch) +@@ -3,7 +3,7 @@ + Limitations: none. + PR: none. + Originator: From the original ffitest.c */ +-/* { dg-do run } */ ++/* { dg-do run { target { ! hppa*-*-hpux* } } } */ + + #include "ffitest.h" + #include "float.h" +Index: libffi/testsuite/lib/libffi.exp +=================================================================== +--- a/src/libffi/testsuite/lib/libffi.exp (.../tags/gcc_5_2_0_release) ++++ b/src/libffi/testsuite/lib/libffi.exp (.../branches/gcc-5-branch) +@@ -24,6 +24,7 @@ + load_lib dg.exp + load_lib libgloss.exp + load_gcc_lib target-supports.exp ++load_gcc_lib target-supports-dg.exp + load_gcc_lib target-libpath.exp + load_gcc_lib wrapper.exp + Index: libcpp/po/nl.po =================================================================== --- a/src/libcpp/po/nl.po (.../tags/gcc_5_2_0_release) diff -u gcc-5-5.2.1/debian/rules.conf gcc-5-5.2.1/debian/rules.conf --- gcc-5-5.2.1/debian/rules.conf +++ gcc-5-5.2.1/debian/rules.conf @@ -207,12 +207,12 @@ BINUTILSBDV = 2.22 ifneq (,$(filter $(distrelease),vivid)) BINUTILSBDV = 2.25-3~ - endif - ifneq (,$(filter $(distrelease),jessie sid)) + else ifneq (,$(filter $(distrelease),jessie sid)) BINUTILSBDV = 2.25-7~ + else ifneq (,$(filter $(distrelease),xenial)) + BINUTILSBDV = 2.25.51.20151028 endif endif -BINUTILSBDV = 2.25.51.20151020-1~ ifeq ($(DEB_CROSS),yes) BINUTILS_BUILD_DEP = binutils$(TS) (>= $(BINUTILSBDV)), binutils-multiarch (>= $(BINUTILSBDV)) BINUTILSV := $(shell dpkg -l binutils$(TS) \ @@ -483,7 +483,7 @@ # try to build with itself, or with the last version ifneq (,$(filter $(distrelease), jessie)) gnat_build_dep := gnat-4.9 [$(ada_no_archs)], g++-4.9 -else ifneq (,$(filter $(distrelease), stretch sid wheezy precise trusty wily)) +else ifneq (,$(filter $(distrelease), stretch sid wheezy precise trusty wily xenial)) gnat_build_dep := gnat-5 [$(ada_no_archs)], g++-5 else ifneq (,$(filter $(distrelease), squeeze lucid)) gnat_build_dep := diff -u gcc-5-5.2.1/debian/rules.d/binary-ada.mk gcc-5-5.2.1/debian/rules.d/binary-ada.mk --- gcc-5-5.2.1/debian/rules.d/binary-ada.mk +++ gcc-5-5.2.1/debian/rules.d/binary-ada.mk @@ -393,8 +393,9 @@ mv $(d_gnat)/usr/share/ada/debian_packaging.mk \ $(d_gnat)/usr/share/ada/debian_packaging-$(GNAT_VERSION).mk endif - dh_link -p$(p_gnat) usr/bin/$(cmd_prefix)gcc$(pkg_ver) usr/bin/$(cmd_prefix)gnatgcc$(pkg_ver) - dh_link -p$(p_gnat) usr/share/man/man1/$(cmd_prefix)gnat$(pkg_ver).1.gz usr/share/man/man1/$(cmd_prefix)gnatgcc$(pkg_ver).1.gz + : # keep this one unversioned, see Debian #802838. + dh_link -p$(p_gnat) usr/bin/$(cmd_prefix)gcc$(pkg_ver) usr/bin/$(cmd_prefix)gnatgcc + dh_link -p$(p_gnat) usr/share/man/man1/$(cmd_prefix)gcc$(pkg_ver).1.gz usr/share/man/man1/$(cmd_prefix)gnatgcc.1.gz debian/dh_rmemptydirs -p$(p_gnat) diff -u gcc-5-5.2.1/debian/rules.defs gcc-5-5.2.1/debian/rules.defs --- gcc-5-5.2.1/debian/rules.defs +++ gcc-5-5.2.1/debian/rules.defs @@ -352,9 +352,9 @@ # build using fsf or linaro ifeq ($(distribution),Ubuntu) ifeq (,$(findstring gnat, $(PKGSOURCE))) - #ifneq (,$(findstring $(DEB_TARGET_ARCH),arm64 armel armhf)) - # with_linaro_branch = yes - #endif + ifneq (,$(findstring $(DEB_TARGET_ARCH),arm64 armel armhf)) + with_linaro_branch = yes + endif endif endif @@ -760,7 +760,7 @@ ifeq (,$(java_cpu)) java_cpu = $(DEB_TARGET_ARCH_CPU) endif - java_priority = 10$(subst .,,$(BASE_VERSION)) + java_priority = 10$(subst .,,$(BASE_VERSION))0 with_libgcj := yes with_libgcjbc := no diff -u gcc-5-5.2.1/debian/rules.parameters gcc-5-5.2.1/debian/rules.parameters --- gcc-5-5.2.1/debian/rules.parameters +++ gcc-5-5.2.1/debian/rules.parameters @@ -2,14 +2,14 @@ GCC_VERSION := 5.2.1 NEXT_GCC_VERSION := 5.2.2 BASE_VERSION := 5 -SOURCE_VERSION := 5.2.1-22ubuntu3 -DEB_VERSION := 5.2.1-22ubuntu3 -DEB_EVERSION := 1:5.2.1-22ubuntu3 -DEB_GDC_VERSION := 5.2.1-22ubuntu3 +SOURCE_VERSION := 5.2.1-23ubuntu1~15.10 +DEB_VERSION := 5.2.1-23ubuntu1~15.10 +DEB_EVERSION := 1:5.2.1-23ubuntu1~15.10 +DEB_GDC_VERSION := 5.2.1-23ubuntu1~15.10 DEB_SOVERSION := 5 DEB_SOEVERSION := 1:5 DEB_LIBGCC_SOVERSION := -DEB_LIBGCC_VERSION := 1:5.2.1-22ubuntu3 +DEB_LIBGCC_VERSION := 1:5.2.1-23ubuntu1~15.10 DEB_STDCXX_SOVERSION := 5 DEB_GCJ_SOVERSION := 5 PKG_GCJ_EXT := 16 diff -u gcc-5-5.2.1/debian/rules.patch gcc-5-5.2.1/debian/rules.patch --- gcc-5-5.2.1/debian/rules.patch +++ gcc-5-5.2.1/debian/rules.patch @@ -87,9 +87,10 @@ go-escape-analysis6 \ gccgo-sendfile-fix \ pr66368 \ - pr67280 \ + $(if $(with_linaro_branch),,pr67280) \ pr67508 \ pr67590 \ + pr67736 \ # this is still needed on powerpc, e.g. firefox and insighttoolkit4 will ftbfs. ifneq (,$(filter $(DEB_TARGET_ARCH),powerpc)) @@ -248,7 +249,7 @@ debian_patches = endif -debian_patches += gcc-sysroot +debian_patches += $(if $(with_linaro_branch),,gcc-sysroot) debian_patches += \ sys-auxv-header \ libcilkrts-targets \ @@ -317,9 +318,6 @@ ifeq ($(DEB_TARGET_ARCH),mipsel) debian_patches += mips-fix-loongson2f-nop endif -ifneq (,$(filter $(DEB_TARGET_ARCH), mips64 mips64el)) - debian_patches += pr67736 -endif debian_patches += libgomp-kfreebsd-testsuite debian_patches += go-testsuite diff -u gcc-5-5.2.1/debian/rules2 gcc-5-5.2.1/debian/rules2 --- gcc-5-5.2.1/debian/rules2 +++ gcc-5-5.2.1/debian/rules2 @@ -399,12 +399,12 @@ ifneq (,$(findstring powerpc64le-linux,$(DEB_TARGET_GNU_TYPE))) CONFARGS += --enable-secureplt - ifneq (,$(filter $(distrelease),jessie trusty utopic vivid)) + ifneq (,$(filter $(distrelease),jessie trusty utopic vivid wily)) CONFARGS += --with-cpu=power7 --with-tune=power8 else CONFARGS += --with-cpu=power8 endif - ifneq (,$(filter $(distrelease),jessie stretch sid trusty utopic vivid wily)) + ifneq (,$(filter $(distrelease),jessie stretch sid trusty utopic vivid wily xenial)) CONFARGS += --enable-targets=powerpcle-linux endif CONFARGS += --disable-multilib