diff -u gcc-5-5.2.1/debian/changelog gcc-5-5.2.1/debian/changelog
--- gcc-5-5.2.1/debian/changelog
+++ gcc-5-5.2.1/debian/changelog
@@ -1,9 +1,45 @@
-gcc-5 (5.2.1-22ubuntu3) wily; urgency=medium
+gcc-5 (5.2.1-23ubuntu1~15.10) wily; urgency=medium
 
-  * Update to SVN 20151020 (r229103, 5.2.1) from the gcc-5-branch.
+  * Test build, using the Linaro support on armhf and arm64.
+
+ -- Matthias Klose <doko@ubuntu.com>  Fri, 30 Oct 2015 19:18:04 +0200
+
+gcc-5 (5.2.1-23ubuntu1) xenial; urgency=medium
+
+  * Again, configure with --enable-targets=powerpcle-linux on ppc64el.
+
+ -- Matthias Klose <doko@ubuntu.com>  Wed, 28 Oct 2015 12:18:49 +0100
+
+gcc-5 (5.2.1-23) unstable; urgency=medium
+
+  * Update to SVN 20151028 (r229478, 5.2.1) from the gcc-5-branch.
+
+  [ Matthias Klose ]
+  * Update the Linaro support to the 5-2015.10 snapshot.
+  * gcj: On ppc64el, use the same jvm archdir name as for openjdk (ppc64le).
+  * gcj: Fix priority of java alternatives. Closes: #803055.
+  * gnat-5: Reintroduce the unversioned gnatgcc name. Closes: #802838.
+
+  [ Aurelien Jarno ]
+  * Replace proposed patch for PR rtl-optimization/67736 by the one
+    committed on trunk.
+
+ -- Matthias Klose <doko@debian.org>  Wed, 28 Oct 2015 10:36:54 +0100
+
+gcc-5 (5.2.1-22ubuntu5) xenial; urgency=medium
+
+  * Revert the fix for PR ipa/67056, causing an ICE.
+
+ -- Matthias Klose <doko@ubuntu.com>  Fri, 23 Oct 2015 19:13:51 +0200
+
+gcc-5 (5.2.1-22ubuntu4) xenial; urgency=medium
+
+  * Update to SVN 20151022 (r229176, 5.2.1) from the gcc-5-branch.
+  * Fix PR ipa/67056, taken from the trunk. Closes: #788299.
   * Target POWER8 on ppc64el.
+  * Again, don't strip the compiler binaries for more verbose ICEs.
 
- -- Matthias Klose <doko@ubuntu.com>  Wed, 21 Oct 2015 03:36:24 +0200
+ -- Matthias Klose <doko@ubuntu.com>  Thu, 22 Oct 2015 17:32:47 +0200
 
 gcc-5 (5.2.1-22ubuntu2) wily; urgency=medium
 
diff -u gcc-5-5.2.1/debian/control gcc-5-5.2.1/debian/control
--- gcc-5-5.2.1/debian/control
+++ gcc-5-5.2.1/debian/control
@@ -13,7 +13,7 @@
   libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], 
   autogen, gawk, lzma, xz-utils, patchutils, 
   zlib1g-dev, systemtap-sdt-dev [linux-any kfreebsd-any hurd-any], 
-  binutils (>= 2.25.51.20151020-1~) | binutils-multiarch (>= 2.25.51.20151020-1~), binutils-hppa64-linux-gnu (>= 2.25.51.20151020-1~) [hppa], 
+  binutils (>= 2.22) | binutils-multiarch (>= 2.22), binutils-hppa64-linux-gnu (>= 2.22) [hppa], 
   gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, 
   gdb, 
   texinfo (>= 4.3), locales, sharutils, 
diff -u gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff
--- gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff
+++ gcc-5-5.2.1/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 5-2015.09 release (documentation).
+# DP: Changes for the Linaro 5-2015.10 release (documentation).
 
 --- a/src/gcc/doc/extend.texi
 +++ b/src/gcc/doc/extend.texi
@@ -715,7 +715,7 @@
  The @code{ifunc} attribute is used to mark a function as an indirect
  function using the STT_GNU_IFUNC symbol type extension to the ELF
  standard.  This allows the resolution of the symbol value to be
-@@ -2883,254 +2715,780 @@ Indirect functions cannot be weak.  Binutils version 2.20.1 or higher
+@@ -2883,324 +2715,585 @@ Indirect functions cannot be weak.  Binutils version 2.20.1 or higher
  and GNU C Library version 2.11.1 are required to use this feature.
  
  @item interrupt
@@ -1057,7 +1057,13 @@
 +@smallexample
 +@group
 +void fatal () __attribute__ ((noreturn));
-+
+ 
+-On RX targets, you may specify one or more vector numbers as arguments
+-to the attribute, as well as naming an alternate table name.
+-Parameters are handled sequentially, so one handler can be assigned to
+-multiple entries in multiple tables.  One may also pass the magic
+-string @code{"$default"} which causes the function to be used for any
+-unfilled slots in the current table.
 +void
 +fatal (/* @r{@dots{}} */)
 +@{
@@ -1118,11 +1124,18 @@
 +to common subexpression elimination and loop optimization just as an
 +arithmetic operator would be.  These functions should be declared
 +with the attribute @code{pure}.  For example,
-+
-+@smallexample
+ 
+-This example shows a simple assignment of a function to one vector in
+-the default table (note that preprocessor macros may be used for
+-chip-specific symbolic vector names):
+ @smallexample
+-void __attribute__ ((interrupt (5))) txd1_handler ();
 +int square (int) __attribute__ ((pure));
-+@end smallexample
-+
+ @end smallexample
+ 
+-This example assigns a function to two slots in the default table
+-(using preprocessor macros defined elsewhere) and makes it the default
+-for the @code{dct} table:
 +@noindent
 +says that the hypothetical function @code{square} is safe to call
 +fewer times than the program says.
@@ -1137,15 +1150,33 @@
 +The @code{returns_nonnull} attribute specifies that the function
 +return value should be a non-null pointer.  For instance, the declaration:
 +
-+@smallexample
+ @smallexample
+-void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default")))
+-	txd1_handler ();
 +extern void *
 +mymalloc (size_t len) __attribute__((returns_nonnull));
-+@end smallexample
-+
+ @end smallexample
+ 
+-@item interrupt_handler
+-@cindex @code{interrupt_handler} function attribute, Blackfin
+-@cindex @code{interrupt_handler} function attribute, m68k
+-@cindex @code{interrupt_handler} function attribute, H8/300
+-@cindex @code{interrupt_handler} function attribute, SH
+-Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to
+-indicate that the specified function is an interrupt handler.  The compiler
+-generates function entry and exit sequences suitable for use in an
+-interrupt handler when this attribute is present.
 +@noindent
 +lets the compiler optimize callers based on the knowledge
 +that the return value will never be null.
-+
+ 
+-@item interrupt_thread
+-@cindex @code{interrupt_thread} function attribute, fido
+-Use this attribute on fido, a subarchitecture of the m68k, to indicate
+-that the specified function is an interrupt handler that is designed
+-to run as a thread.  The compiler omits generate prologue/epilogue
+-sequences and replaces the return instruction with a @code{sleep}
+-instruction.  This attribute is available only on fido.
 +@item returns_twice
 +@cindex @code{returns_twice} function attribute
 +@cindex functions that return more than once
@@ -1156,7 +1187,11 @@
 +function.  Examples of such functions are @code{setjmp} and @code{vfork}.
 +The @code{longjmp}-like counterpart of such function, if any, might need
 +to be marked with the @code{noreturn} attribute.
-+
+ 
+-@item isr
+-@cindex @code{isr} function attribute, ARM
+-Use this attribute on ARM to write Interrupt Service Routines. This is an
+-alias to the @code{interrupt} attribute above.
 +@item section ("@var{section-name}")
 +@cindex @code{section} function attribute
 +@cindex functions in arbitrary sections
@@ -1165,19 +1200,46 @@
 +particular functions to appear in special sections.  The @code{section}
 +attribute specifies that a function lives in a particular section.
 +For example, the declaration:
-+
+ 
+-@item kspisusp
+-@cindex @code{kspisusp} function attribute, Blackfin
+-@cindex User stack pointer in interrupts on the Blackfin
+-When used together with @code{interrupt_handler}, @code{exception_handler}
+-or @code{nmi_handler}, code is generated to load the stack pointer
+-from the USP register in the function prologue.
 +@smallexample
 +extern void foobar (void) __attribute__ ((section ("bar")));
 +@end smallexample
-+
+ 
+-@item l1_text
+-@cindex @code{l1_text} function attribute, Blackfin
+-This attribute specifies a function to be placed into L1 Instruction
+-SRAM@. The function is put into a specific section named @code{.l1.text}.
+-With @option{-mfdpic}, function calls with a such function as the callee
+-or caller uses inlined PLT.
 +@noindent
 +puts the function @code{foobar} in the @code{bar} section.
-+
+ 
+-@item l2
+-@cindex @code{l2} function attribute, Blackfin
+-On the Blackfin, this attribute specifies a function to be placed into L2
+-SRAM. The function is put into a specific section named
+-@code{.l1.text}. With @option{-mfdpic}, callers of such functions use
+-an inlined PLT.
 +Some file formats do not support arbitrary sections so the @code{section}
 +attribute is not available on all platforms.
 +If you need to map the entire contents of a module to a particular
 +section, consider using the facilities of the linker instead.
-+
+ 
+-@item leaf
+-@cindex @code{leaf} function attribute
+-Calls to external functions with this attribute must return to the current
+-compilation unit only by return or by exception handling.  In particular, leaf
+-functions are not allowed to call callback function passed to it from the current
+-compilation unit or directly call functions exported by the unit or longjmp
+-into the unit.  Leaf function might still call functions from other compilation
+-units and thus they are not necessarily leaf in the sense that they contain no
+-function calls at all.
 +@item sentinel
 +@cindex @code{sentinel} function attribute
 +This function attribute ensures that a parameter in a function call is
@@ -1186,17 +1248,29 @@
 +last parameter of the function call.  If an optional integer position
 +argument P is supplied to the attribute, the sentinel must be located at
 +position P counting backwards from the end of the argument list.
-+
+ 
+-The attribute is intended for library functions to improve dataflow analysis.
+-The compiler takes the hint that any data not escaping the current compilation unit can
+-not be used or modified by the leaf function.  For example, the @code{sin} function
+-is a leaf function, but @code{qsort} is not.
 +@smallexample
 +__attribute__ ((sentinel))
 +is equivalent to
 +__attribute__ ((sentinel(0)))
 +@end smallexample
-+
+ 
+-Note that leaf functions might invoke signals and signal handlers might be
+-defined in the current compilation unit and use static variables.  The only
+-compliant way to write such a signal handler is to declare such variables
+-@code{volatile}.
 +The attribute is automatically set with a position of 0 for the built-in
 +functions @code{execl} and @code{execlp}.  The built-in function
 +@code{execle} has the attribute set with a position of 1.
-+
+ 
+-The attribute has no effect on functions defined within the current compilation
+-unit.  This is to allow easy merging of multiple compilation units into one,
+-for example, by using the link-time optimization.  For this reason the
+-attribute is not allowed on types to annotate indirect calls.
 +A valid @code{NULL} in this context is defined as zero with any pointer
 +type.  If your system defines the @code{NULL} macro with an integer type
 +then you need to add an explicit cast.  GCC replaces @code{stddef.h}
@@ -1244,8 +1318,9 @@
 +or separate the options with a comma (@samp{,}) within a single string.
 +
 +The options supported are specific to each target; refer to @ref{x86
-+Function Attributes}, @ref{PowerPC Function Attributes}, and
-+@ref{Nios II Function Attributes}, for details.
++Function Attributes}, @ref{PowerPC Function Attributes},
++@ref{ARM Function Attributes},and @ref{Nios II Function Attributes},
++for details.
 +
 +@item unused
 +@cindex @code{unused} function attribute
@@ -1383,13 +1458,7 @@
 +  return 0;
 +@}
 +@end smallexample
- 
--On RX targets, you may specify one or more vector numbers as arguments
--to the attribute, as well as naming an alternate table name.
--Parameters are handled sequentially, so one handler can be assigned to
--multiple entries in multiple tables.  One may also pass the magic
--string @code{"$default"} which causes the function to be used for any
--unfilled slots in the current table.
++
 +@noindent
 +results in warning on line 5.
 +
@@ -1412,23 +1481,16 @@
 +implicitly marks the declaration as @code{weak}.  Without a
 +@var{target}, given as an argument to @code{weakref} or to @code{alias},
 +@code{weakref} is equivalent to @code{weak}.
- 
--This example shows a simple assignment of a function to one vector in
--the default table (note that preprocessor macros may be used for
--chip-specific symbolic vector names):
- @smallexample
--void __attribute__ ((interrupt (5))) txd1_handler ();
++
++@smallexample
 +static int x() __attribute__ ((weakref ("y")));
 +/* is equivalent to... */
 +static int x() __attribute__ ((weak, weakref, alias ("y")));
 +/* and to... */
 +static int x() __attribute__ ((weakref));
 +static int x() __attribute__ ((alias ("y")));
- @end smallexample
- 
--This example assigns a function to two slots in the default table
--(using preprocessor macros defined elsewhere) and makes it the default
--for the @code{dct} table:
++@end smallexample
++
 +A weak reference is an alias that does not by itself require a
 +definition to be given for the target symbol.  If the target symbol is
 +only referenced through weak references, then it becomes a @code{weak}
@@ -1465,44 +1527,104 @@
 +On the ARC, you must specify the kind of interrupt to be handled
 +in a parameter to the interrupt attribute like this:
 +
- @smallexample
--void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default")))
--	txd1_handler ();
++@smallexample
 +void f () __attribute__ ((interrupt ("ilink1")));
 +@end smallexample
 +
 +Permissible values for this parameter are: @w{@code{ilink1}} and
 +@w{@code{ilink2}}.
-+
-+@item long_call
-+@itemx medium_call
-+@itemx short_call
-+@cindex @code{long_call} function attribute, ARC
-+@cindex @code{medium_call} function attribute, ARC
-+@cindex @code{short_call} function attribute, ARC
-+@cindex indirect calls, ARC
+ 
+ @item long_call
+ @itemx medium_call
+ @itemx short_call
+ @cindex @code{long_call} function attribute, ARC
+-@cindex @code{long_call} function attribute, ARM
+-@cindex @code{long_call} function attribute, Epiphany
+ @cindex @code{medium_call} function attribute, ARC
+ @cindex @code{short_call} function attribute, ARC
+-@cindex @code{short_call} function attribute, ARM
+-@cindex @code{short_call} function attribute, Epiphany
+ @cindex indirect calls, ARC
+-@cindex indirect calls, ARM
+-@cindex indirect calls, Epiphany
+-These attributes specify how a particular function is called on
+-ARC, ARM and Epiphany - with @code{medium_call} being specific to ARC.
 +These attributes specify how a particular function is called.
-+These attributes override the
+ These attributes override the
+-@option{-mlong-calls} (@pxref{ARM Options} and @ref{ARC Options})
+-and @option{-mmedium-calls} (@pxref{ARC Options})
+-command-line switches and @code{#pragma long_calls} settings.  For ARM, the
+-@code{long_call} attribute indicates that the function might be far
+-away from the call site and require a different (more expensive)
+-calling sequence.   The @code{short_call} attribute always places
+-the offset to the function from the call site into the @samp{BL}
+-instruction directly.
 +@option{-mlong-calls} and @option{-mmedium-calls} (@pxref{ARC Options})
 +command-line switches and @code{#pragma long_calls} settings.
-+
-+For ARC, a function marked with the @code{long_call} attribute is
-+always called using register-indirect jump-and-link instructions,
-+thereby enabling the called function to be placed anywhere within the
-+32-bit address space.  A function marked with the @code{medium_call}
-+attribute will always be close enough to be called with an unconditional
-+branch-and-link instruction, which has a 25-bit offset from
-+the call site.  A function marked with the @code{short_call}
-+attribute will always be close enough to be called with a conditional
-+branch-and-link instruction, which has a 21-bit offset from
-+the call site.
+ 
+ For ARC, a function marked with the @code{long_call} attribute is
+ always called using register-indirect jump-and-link instructions,
+@@ -3212,179 +3305,134 @@ the call site.  A function marked with the @code{short_call}
+ attribute will always be close enough to be called with a conditional
+ branch-and-link instruction, which has a 21-bit offset from
+ the call site.
 +@end table
-+
+ 
+-@item longcall
+-@itemx shortcall
+-@cindex indirect calls, Blackfin
+-@cindex indirect calls, PowerPC
+-@cindex @code{longcall} function attribute, Blackfin
+-@cindex @code{longcall} function attribute, PowerPC
+-@cindex @code{shortcall} function attribute, Blackfin
+-@cindex @code{shortcall} function attribute, PowerPC
+-On Blackfin and PowerPC, the @code{longcall} attribute
+-indicates that the function might be far away from the call site and
+-require a different (more expensive) calling sequence.  The
+-@code{shortcall} attribute indicates that the function is always close
+-enough for the shorter calling sequence to be used.  These attributes
+-override both the @option{-mlongcall} switch and, on the RS/6000 and
+-PowerPC, the @code{#pragma longcall} setting.
+-
+-@xref{RS/6000 and PowerPC Options}, for more information on whether long
+-calls are necessary.
+-
+-@item long_call
+-@itemx near
+-@itemx far
+-@cindex indirect calls, MIPS
+-@cindex @code{long_call} function attribute, MIPS
+-@cindex @code{near} function attribute, MIPS
+-@cindex @code{far} function attribute, MIPS
+-These attributes specify how a particular function is called on MIPS@.
+-The attributes override the @option{-mlong-calls} (@pxref{MIPS Options})
+-command-line switch.  The @code{long_call} and @code{far} attributes are
+-synonyms, and cause the compiler to always call
+-the function by first loading its address into a register, and then using
+-the contents of that register.  The @code{near} attribute has the opposite
+-effect; it specifies that non-PIC calls should be made using the more
+-efficient @code{jal} instruction.
+-
+-@item malloc
+-@cindex @code{malloc} function attribute
+-This tells the compiler that a function is @code{malloc}-like, i.e.,
+-that the pointer @var{P} returned by the function cannot alias any
+-other pointer valid when the function returns, and moreover no
+-pointers to valid objects occur in any storage addressed by @var{P}.
 +@node ARM Function Attributes
 +@subsection ARM Function Attributes
-+
+ 
+-Using this attribute can improve optimization.  Functions like
+-@code{malloc} and @code{calloc} have this property because they return
+-a pointer to uninitialized or zeroed-out storage.  However, functions
+-like @code{realloc} do not have this property, as they can return a
+-pointer to storage containing pointers.
 +These function attributes are supported for ARM targets:
-+
+ 
+-@item mips16
+-@itemx nomips16
+-@cindex @code{mips16} function attribute, MIPS
+-@cindex @code{nomips16} function attribute, MIPS
 +@table @code
 +@item interrupt
 +@cindex @code{interrupt} function attribute, ARM
@@ -1510,26 +1632,58 @@
 +that the specified function is an interrupt handler.  The compiler generates
 +function entry and exit sequences suitable for use in an interrupt handler
 +when this attribute is present.
-+
+ 
+-On MIPS targets, you can use the @code{mips16} and @code{nomips16}
+-function attributes to locally select or turn off MIPS16 code generation.
+-A function with the @code{mips16} attribute is emitted as MIPS16 code,
+-while MIPS16 code generation is disabled for functions with the
+-@code{nomips16} attribute.  These attributes override the
+-@option{-mips16} and @option{-mno-mips16} options on the command line
+-(@pxref{MIPS Options}).
 +You can specify the kind of interrupt to be handled by
 +adding an optional parameter to the interrupt attribute like this:
-+
+ 
+-When compiling files containing mixed MIPS16 and non-MIPS16 code, the
+-preprocessor symbol @code{__mips16} reflects the setting on the command line,
+-not that within individual functions.  Mixed MIPS16 and non-MIPS16 code
+-may interact badly with some GCC extensions such as @code{__builtin_apply}
+-(@pxref{Constructing Calls}).
 +@smallexample
 +void f () __attribute__ ((interrupt ("IRQ")));
 +@end smallexample
-+
+ 
+-@item micromips, MIPS
+-@itemx nomicromips, MIPS
+-@cindex @code{micromips} function attribute
+-@cindex @code{nomicromips} function attribute
 +@noindent
 +Permissible values for this parameter are: @code{IRQ}, @code{FIQ},
 +@code{SWI}, @code{ABORT} and @code{UNDEF}.
-+
+ 
+-On MIPS targets, you can use the @code{micromips} and @code{nomicromips}
+-function attributes to locally select or turn off microMIPS code generation.
+-A function with the @code{micromips} attribute is emitted as microMIPS code,
+-while microMIPS code generation is disabled for functions with the
+-@code{nomicromips} attribute.  These attributes override the
+-@option{-mmicromips} and @option{-mno-micromips} options on the command line
+-(@pxref{MIPS Options}).
 +On ARMv7-M the interrupt type is ignored, and the attribute means the function
 +may be called with a word-aligned stack pointer.
-+
+ 
+-When compiling files containing mixed microMIPS and non-microMIPS code, the
+-preprocessor symbol @code{__mips_micromips} reflects the setting on the
+-command line,
+-not that within individual functions.  Mixed microMIPS and non-microMIPS code
+-may interact badly with some GCC extensions such as @code{__builtin_apply}
+-(@pxref{Constructing Calls}).
 +@item isr
 +@cindex @code{isr} function attribute, ARM
 +Use this attribute on ARM to write Interrupt Service Routines. This is an
 +alias to the @code{interrupt} attribute above.
-+
+ 
+-@item model (@var{model-name})
+-@cindex @code{model} function attribute, M32R/D
+-@cindex function addressability on the M32R/D
 +@item long_call
 +@itemx short_call
 +@cindex @code{long_call} function attribute, ARM
@@ -1544,7 +1698,11 @@
 +calling sequence.   The @code{short_call} attribute always places
 +the offset to the function from the call site into the @samp{BL}
 +instruction directly.
-+
+ 
+-On the M32R/D, use this attribute to set the addressability of an
+-object, and of the code generated for a function.  The identifier
+-@var{model-name} is one of @code{small}, @code{medium}, or
+-@code{large}, representing each of the code models.
 +@item naked
 +@cindex @code{naked} function attribute, ARM
 +This attribute allows the compiler to construct the
@@ -1555,35 +1713,105 @@
 +(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
 +basic @code{asm} and C code may appear to work, they cannot be
 +depended upon to work reliably and are not supported.
-+
+ 
+-Small model objects live in the lower 16MB of memory (so that their
+-addresses can be loaded with the @code{ld24} instruction), and are
+-callable with the @code{bl} instruction.
 +@item pcs
 +@cindex @code{pcs} function attribute, ARM
-+
+ 
+-Medium model objects may live anywhere in the 32-bit address space (the
+-compiler generates @code{seth/add3} instructions to load their addresses),
+-and are callable with the @code{bl} instruction.
 +The @code{pcs} attribute can be used to control the calling convention
 +used for a function on ARM.  The attribute takes an argument that specifies
 +the calling convention to use.
-+
+ 
+-Large model objects may live anywhere in the 32-bit address space (the
+-compiler generates @code{seth/add3} instructions to load their addresses),
+-and may not be reachable with the @code{bl} instruction (the compiler
+-generates the much slower @code{seth/add3/jl} instruction sequence).
 +When compiling using the AAPCS ABI (or a variant of it) then valid
 +values for the argument are @code{"aapcs"} and @code{"aapcs-vfp"}.  In
 +order to use a variant other than @code{"aapcs"} then the compiler must
 +be permitted to use the appropriate co-processor registers (i.e., the
 +VFP registers must be available in order to use @code{"aapcs-vfp"}).
 +For example,
-+
+ 
+-@item ms_abi
+-@itemx sysv_abi
+-@cindex @code{ms_abi} function attribute, x86
+-@cindex @code{sysv_abi} function attribute, x86
 +@smallexample
 +/* Argument passed in r0, and result returned in r0+r1.  */
 +double f2d (float) __attribute__((pcs("aapcs")));
- @end smallexample
++@end smallexample
  
+-On 32-bit and 64-bit x86 targets, you can use an ABI attribute
+-to indicate which calling convention should be used for a function.  The
+-@code{ms_abi} attribute tells the compiler to use the Microsoft ABI,
+-while the @code{sysv_abi} attribute tells the compiler to use the ABI
+-used on GNU/Linux and other systems.  The default is to use the Microsoft ABI
+-when targeting Windows.  On all other systems, the default is the x86/AMD ABI.
 +Variadic functions always use the @code{"aapcs"} calling convention and
 +the compiler rejects attempts to specify an alternative.
+ 
+-Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently
+-requires the @option{-maccumulate-outgoing-args} option.
++@item target (@var{options})
++@cindex @code{target} function attribute
++As discussed in @ref{Common Function Attributes}, this attribute 
++allows specification of target-specific compilation options.
+ 
+-@item callee_pop_aggregate_return (@var{number})
+-@cindex @code{callee_pop_aggregate_return} function attribute, x86
++On ARM, the following options are allowed:
+ 
+-On x86-32 targets, you can use this attribute to control how
+-aggregates are returned in memory.  If the caller is responsible for
+-popping the hidden pointer together with the rest of the arguments, specify
+-@var{number} equal to zero.  If callee is responsible for popping the
+-hidden pointer, specify @var{number} equal to one.  
++@table @samp
++@item thumb
++@cindex @code{target("thumb")} function attribute, ARM
++Force code generation in the Thumb (T16/T32) ISA, depending on the
++architecture level.
++
++@item arm
++@cindex @code{target("arm")} function attribute, ARM
++Force code generation in the ARM (A32) ISA.
 +@end table
-+
+ 
+-The default x86-32 ABI assumes that the callee pops the
+-stack for hidden pointer.  However, on x86-32 Microsoft Windows targets,
+-the compiler assumes that the
+-caller pops the stack for hidden pointer.
++Functions from different modes can be inlined in the caller's mode.
+ 
+-@item ms_hook_prologue
+-@cindex @code{ms_hook_prologue} function attribute, x86
++@end table
+ 
+-On 32-bit and 64-bit x86 targets, you can use
+-this function attribute to make GCC generate the ``hot-patching'' function
+-prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2
+-and newer.
 +@node AVR Function Attributes
 +@subsection AVR Function Attributes
-+
+ 
+-@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label})
+-@cindex @code{hotpatch} function attribute, S/390
 +These function attributes are supported by the AVR back end:
-+
+ 
+-On S/390 System z targets, you can use this function attribute to
+-make GCC generate a ``hot-patching'' function prologue.  If the
+-@option{-mhotpatch=} command-line option is used at the same time,
+-the @code{hotpatch} attribute takes precedence.  The first of the
+-two arguments specifies the number of halfwords to be added before
+-the function label.  A second argument can be used to specify the
+-number of halfwords to be added after the function label.  For
+-both arguments the maximum allowed value is 1000000.
 +@table @code
 +@item interrupt
 +@cindex @code{interrupt} function attribute, AVR
@@ -1591,7 +1819,8 @@
 +that the specified function is an interrupt handler.  The compiler generates
 +function entry and exit sequences suitable for use in an interrupt handler
 +when this attribute is present.
-+
+ 
+-If both arguments are zero, hotpatching is disabled.
 +On the AVR, the hardware globally disables interrupts when an
 +interrupt is executed.  The first instruction of an interrupt handler
 +declared with this attribute is a @code{SEI} instruction to
@@ -1599,18 +1828,33 @@
 +that does not insert a @code{SEI} instruction.  If both @code{signal} and
 +@code{interrupt} are specified for the same function, @code{signal}
 +is silently ignored.
-+
-+@item naked
-+@cindex @code{naked} function attribute, AVR
+ 
+ @item naked
+-@cindex @code{naked} function attribute, ARM
+ @cindex @code{naked} function attribute, AVR
+-@cindex @code{naked} function attribute, MCORE
+-@cindex @code{naked} function attribute, MSP430
+-@cindex @code{naked} function attribute, NDS32
+-@cindex @code{naked} function attribute, RL78
+-@cindex @code{naked} function attribute, RX
+-@cindex @code{naked} function attribute, SPU
+-@cindex function without prologue/epilogue code
+-This attribute is available on the ARM, AVR, MCORE, MSP430, NDS32,
+-RL78, RX and SPU ports.  It allows the compiler to construct the
 +This attribute allows the compiler to construct the
-+requisite function declaration, while allowing the body of the
-+function to be assembly code. The specified function will not have
-+prologue/epilogue sequences generated by the compiler. Only basic
-+@code{asm} statements can safely be included in naked functions
-+(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
-+basic @code{asm} and C code may appear to work, they cannot be
-+depended upon to work reliably and are not supported.
-+
+ requisite function declaration, while allowing the body of the
+ function to be assembly code. The specified function will not have
+ prologue/epilogue sequences generated by the compiler. Only basic
+@@ -3393,12 +3441,108 @@ prologue/epilogue sequences generated by the compiler. Only basic
+ basic @code{asm} and C code may appear to work, they cannot be
+ depended upon to work reliably and are not supported.
+ 
+-@item near
+-@cindex @code{near} function attribute, MeP
+-@cindex functions that do not handle memory bank switching on 68HC11/68HC12
+-On MeP targets this attribute causes the compiler to assume the called
+-function is close enough to use the normal calling convention,
+-overriding the @option{-mtf} command-line option.
 +@item OS_main
 +@itemx OS_task
 +@cindex @code{OS_main} function attribute, AVR
@@ -1674,313 +1918,65 @@
 +exit sequences suitable for use in an exception handler when this
 +attribute is present.
 +
- @item interrupt_handler
- @cindex @code{interrupt_handler} function attribute, Blackfin
--@cindex @code{interrupt_handler} function attribute, m68k
--@cindex @code{interrupt_handler} function attribute, H8/300
--@cindex @code{interrupt_handler} function attribute, SH
--Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to
++@item interrupt_handler
++@cindex @code{interrupt_handler} function attribute, Blackfin
 +Use this attribute to
- indicate that the specified function is an interrupt handler.  The compiler
- generates function entry and exit sequences suitable for use in an
- interrupt handler when this attribute is present.
- 
--@item interrupt_thread
--@cindex @code{interrupt_thread} function attribute, fido
--Use this attribute on fido, a subarchitecture of the m68k, to indicate
--that the specified function is an interrupt handler that is designed
--to run as a thread.  The compiler omits generate prologue/epilogue
--sequences and replaces the return instruction with a @code{sleep}
--instruction.  This attribute is available only on fido.
--
--@item isr
--@cindex @code{isr} function attribute, ARM
--Use this attribute on ARM to write Interrupt Service Routines. This is an
--alias to the @code{interrupt} attribute above.
--
- @item kspisusp
- @cindex @code{kspisusp} function attribute, Blackfin
- @cindex User stack pointer in interrupts on the Blackfin
-@@ -3147,258 +3505,22 @@ or caller uses inlined PLT.
- 
- @item l2
- @cindex @code{l2} function attribute, Blackfin
--On the Blackfin, this attribute specifies a function to be placed into L2
++indicate that the specified function is an interrupt handler.  The compiler
++generates function entry and exit sequences suitable for use in an
++interrupt handler when this attribute is present.
++
++@item kspisusp
++@cindex @code{kspisusp} function attribute, Blackfin
++@cindex User stack pointer in interrupts on the Blackfin
++When used together with @code{interrupt_handler}, @code{exception_handler}
++or @code{nmi_handler}, code is generated to load the stack pointer
++from the USP register in the function prologue.
++
++@item l1_text
++@cindex @code{l1_text} function attribute, Blackfin
++This attribute specifies a function to be placed into L1 Instruction
++SRAM@. The function is put into a specific section named @code{.l1.text}.
++With @option{-mfdpic}, function calls with a such function as the callee
++or caller uses inlined PLT.
++
++@item l2
++@cindex @code{l2} function attribute, Blackfin
 +This attribute specifies a function to be placed into L2
- SRAM. The function is put into a specific section named
--@code{.l1.text}. With @option{-mfdpic}, callers of such functions use
++SRAM. The function is put into a specific section named
 +@code{.l2.text}. With @option{-mfdpic}, callers of such functions use
- an inlined PLT.
- 
--@item leaf
--@cindex @code{leaf} function attribute
--Calls to external functions with this attribute must return to the current
--compilation unit only by return or by exception handling.  In particular, leaf
--functions are not allowed to call callback function passed to it from the current
--compilation unit or directly call functions exported by the unit or longjmp
--into the unit.  Leaf function might still call functions from other compilation
--units and thus they are not necessarily leaf in the sense that they contain no
--function calls at all.
--
--The attribute is intended for library functions to improve dataflow analysis.
--The compiler takes the hint that any data not escaping the current compilation unit can
--not be used or modified by the leaf function.  For example, the @code{sin} function
--is a leaf function, but @code{qsort} is not.
--
--Note that leaf functions might invoke signals and signal handlers might be
--defined in the current compilation unit and use static variables.  The only
--compliant way to write such a signal handler is to declare such variables
--@code{volatile}.
--
--The attribute has no effect on functions defined within the current compilation
--unit.  This is to allow easy merging of multiple compilation units into one,
--for example, by using the link-time optimization.  For this reason the
--attribute is not allowed on types to annotate indirect calls.
--
--@item long_call
--@itemx medium_call
--@itemx short_call
--@cindex @code{long_call} function attribute, ARC
--@cindex @code{long_call} function attribute, ARM
--@cindex @code{long_call} function attribute, Epiphany
--@cindex @code{medium_call} function attribute, ARC
--@cindex @code{short_call} function attribute, ARC
--@cindex @code{short_call} function attribute, ARM
--@cindex @code{short_call} function attribute, Epiphany
--@cindex indirect calls, ARC
--@cindex indirect calls, ARM
--@cindex indirect calls, Epiphany
--These attributes specify how a particular function is called on
--ARC, ARM and Epiphany - with @code{medium_call} being specific to ARC.
--These attributes override the
--@option{-mlong-calls} (@pxref{ARM Options} and @ref{ARC Options})
--and @option{-mmedium-calls} (@pxref{ARC Options})
--command-line switches and @code{#pragma long_calls} settings.  For ARM, the
--@code{long_call} attribute indicates that the function might be far
--away from the call site and require a different (more expensive)
--calling sequence.   The @code{short_call} attribute always places
--the offset to the function from the call site into the @samp{BL}
--instruction directly.
--
--For ARC, a function marked with the @code{long_call} attribute is
--always called using register-indirect jump-and-link instructions,
--thereby enabling the called function to be placed anywhere within the
--32-bit address space.  A function marked with the @code{medium_call}
--attribute will always be close enough to be called with an unconditional
--branch-and-link instruction, which has a 25-bit offset from
--the call site.  A function marked with the @code{short_call}
--attribute will always be close enough to be called with a conditional
--branch-and-link instruction, which has a 21-bit offset from
--the call site.
--
- @item longcall
- @itemx shortcall
- @cindex indirect calls, Blackfin
--@cindex indirect calls, PowerPC
- @cindex @code{longcall} function attribute, Blackfin
--@cindex @code{longcall} function attribute, PowerPC
- @cindex @code{shortcall} function attribute, Blackfin
--@cindex @code{shortcall} function attribute, PowerPC
--On Blackfin and PowerPC, the @code{longcall} attribute
++an inlined PLT.
++
++@item longcall
++@itemx shortcall
++@cindex indirect calls, Blackfin
++@cindex @code{longcall} function attribute, Blackfin
++@cindex @code{shortcall} function attribute, Blackfin
 +The @code{longcall} attribute
- indicates that the function might be far away from the call site and
- require a different (more expensive) calling sequence.  The
- @code{shortcall} attribute indicates that the function is always close
- enough for the shorter calling sequence to be used.  These attributes
--override both the @option{-mlongcall} switch and, on the RS/6000 and
--PowerPC, the @code{#pragma longcall} setting.
--
--@xref{RS/6000 and PowerPC Options}, for more information on whether long
--calls are necessary.
--
--@item long_call
--@itemx near
--@itemx far
--@cindex indirect calls, MIPS
--@cindex @code{long_call} function attribute, MIPS
--@cindex @code{near} function attribute, MIPS
--@cindex @code{far} function attribute, MIPS
--These attributes specify how a particular function is called on MIPS@.
--The attributes override the @option{-mlong-calls} (@pxref{MIPS Options})
--command-line switch.  The @code{long_call} and @code{far} attributes are
--synonyms, and cause the compiler to always call
--the function by first loading its address into a register, and then using
--the contents of that register.  The @code{near} attribute has the opposite
--effect; it specifies that non-PIC calls should be made using the more
--efficient @code{jal} instruction.
--
--@item malloc
--@cindex @code{malloc} function attribute
--This tells the compiler that a function is @code{malloc}-like, i.e.,
--that the pointer @var{P} returned by the function cannot alias any
--other pointer valid when the function returns, and moreover no
--pointers to valid objects occur in any storage addressed by @var{P}.
--
--Using this attribute can improve optimization.  Functions like
--@code{malloc} and @code{calloc} have this property because they return
--a pointer to uninitialized or zeroed-out storage.  However, functions
--like @code{realloc} do not have this property, as they can return a
--pointer to storage containing pointers.
--
--@item mips16
--@itemx nomips16
--@cindex @code{mips16} function attribute, MIPS
--@cindex @code{nomips16} function attribute, MIPS
--
--On MIPS targets, you can use the @code{mips16} and @code{nomips16}
--function attributes to locally select or turn off MIPS16 code generation.
--A function with the @code{mips16} attribute is emitted as MIPS16 code,
--while MIPS16 code generation is disabled for functions with the
--@code{nomips16} attribute.  These attributes override the
--@option{-mips16} and @option{-mno-mips16} options on the command line
--(@pxref{MIPS Options}).
--
--When compiling files containing mixed MIPS16 and non-MIPS16 code, the
--preprocessor symbol @code{__mips16} reflects the setting on the command line,
--not that within individual functions.  Mixed MIPS16 and non-MIPS16 code
--may interact badly with some GCC extensions such as @code{__builtin_apply}
--(@pxref{Constructing Calls}).
--
--@item micromips, MIPS
--@itemx nomicromips, MIPS
--@cindex @code{micromips} function attribute
--@cindex @code{nomicromips} function attribute
++indicates that the function might be far away from the call site and
++require a different (more expensive) calling sequence.  The
++@code{shortcall} attribute indicates that the function is always close
++enough for the shorter calling sequence to be used.  These attributes
++override the @option{-mlongcall} switch.
+ 
+ @item nesting
+ @cindex @code{nesting} function attribute, Blackfin
+@@ -3415,410 +3559,349 @@ is an NMI handler.  The compiler generates function entry and
+ exit sequences suitable for use in an NMI handler when this
+ attribute is present.
+ 
+-@item nocompression
+-@cindex @code{nocompression} function attribute, MIPS
+-On MIPS targets, you can use the @code{nocompression} function attribute
+-to locally turn off MIPS16 and microMIPS code generation.  This attribute
+-overrides the @option{-mips16} and @option{-mmicromips} options on the
+-command line (@pxref{MIPS Options}).
 -
--On MIPS targets, you can use the @code{micromips} and @code{nomicromips}
--function attributes to locally select or turn off microMIPS code generation.
--A function with the @code{micromips} attribute is emitted as microMIPS code,
--while microMIPS code generation is disabled for functions with the
--@code{nomicromips} attribute.  These attributes override the
--@option{-mmicromips} and @option{-mno-micromips} options on the command line
--(@pxref{MIPS Options}).
--
--When compiling files containing mixed microMIPS and non-microMIPS code, the
--preprocessor symbol @code{__mips_micromips} reflects the setting on the
--command line,
--not that within individual functions.  Mixed microMIPS and non-microMIPS code
--may interact badly with some GCC extensions such as @code{__builtin_apply}
--(@pxref{Constructing Calls}).
--
--@item model (@var{model-name})
--@cindex @code{model} function attribute, M32R/D
--@cindex function addressability on the M32R/D
--
--On the M32R/D, use this attribute to set the addressability of an
--object, and of the code generated for a function.  The identifier
--@var{model-name} is one of @code{small}, @code{medium}, or
--@code{large}, representing each of the code models.
--
--Small model objects live in the lower 16MB of memory (so that their
--addresses can be loaded with the @code{ld24} instruction), and are
--callable with the @code{bl} instruction.
--
--Medium model objects may live anywhere in the 32-bit address space (the
--compiler generates @code{seth/add3} instructions to load their addresses),
--and are callable with the @code{bl} instruction.
--
--Large model objects may live anywhere in the 32-bit address space (the
--compiler generates @code{seth/add3} instructions to load their addresses),
--and may not be reachable with the @code{bl} instruction (the compiler
--generates the much slower @code{seth/add3/jl} instruction sequence).
--
--@item ms_abi
--@itemx sysv_abi
--@cindex @code{ms_abi} function attribute, x86
--@cindex @code{sysv_abi} function attribute, x86
--
--On 32-bit and 64-bit x86 targets, you can use an ABI attribute
--to indicate which calling convention should be used for a function.  The
--@code{ms_abi} attribute tells the compiler to use the Microsoft ABI,
--while the @code{sysv_abi} attribute tells the compiler to use the ABI
--used on GNU/Linux and other systems.  The default is to use the Microsoft ABI
--when targeting Windows.  On all other systems, the default is the x86/AMD ABI.
--
--Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently
--requires the @option{-maccumulate-outgoing-args} option.
--
--@item callee_pop_aggregate_return (@var{number})
--@cindex @code{callee_pop_aggregate_return} function attribute, x86
--
--On x86-32 targets, you can use this attribute to control how
--aggregates are returned in memory.  If the caller is responsible for
--popping the hidden pointer together with the rest of the arguments, specify
--@var{number} equal to zero.  If callee is responsible for popping the
--hidden pointer, specify @var{number} equal to one.  
--
--The default x86-32 ABI assumes that the callee pops the
--stack for hidden pointer.  However, on x86-32 Microsoft Windows targets,
--the compiler assumes that the
--caller pops the stack for hidden pointer.
--
--@item ms_hook_prologue
--@cindex @code{ms_hook_prologue} function attribute, x86
--
--On 32-bit and 64-bit x86 targets, you can use
--this function attribute to make GCC generate the ``hot-patching'' function
--prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2
--and newer.
--
--@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label})
--@cindex @code{hotpatch} function attribute, S/390
--
--On S/390 System z targets, you can use this function attribute to
--make GCC generate a ``hot-patching'' function prologue.  If the
--@option{-mhotpatch=} command-line option is used at the same time,
--the @code{hotpatch} attribute takes precedence.  The first of the
--two arguments specifies the number of halfwords to be added before
--the function label.  A second argument can be used to specify the
--number of halfwords to be added after the function label.  For
--both arguments the maximum allowed value is 1000000.
--
--If both arguments are zero, hotpatching is disabled.
--
--@item naked
--@cindex @code{naked} function attribute, ARM
--@cindex @code{naked} function attribute, AVR
--@cindex @code{naked} function attribute, MCORE
--@cindex @code{naked} function attribute, MSP430
--@cindex @code{naked} function attribute, NDS32
--@cindex @code{naked} function attribute, RL78
--@cindex @code{naked} function attribute, RX
--@cindex @code{naked} function attribute, SPU
--@cindex function without prologue/epilogue code
--This attribute is available on the ARM, AVR, MCORE, MSP430, NDS32,
--RL78, RX and SPU ports.  It allows the compiler to construct the
--requisite function declaration, while allowing the body of the
--function to be assembly code. The specified function will not have
--prologue/epilogue sequences generated by the compiler. Only basic
--@code{asm} statements can safely be included in naked functions
--(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
--basic @code{asm} and C code may appear to work, they cannot be
--depended upon to work reliably and are not supported.
--
--@item near
--@cindex @code{near} function attribute, MeP
--@cindex functions that do not handle memory bank switching on 68HC11/68HC12
--On MeP targets this attribute causes the compiler to assume the called
--function is close enough to use the normal calling convention,
--overriding the @option{-mtf} command-line option.
-+override the @option{-mlongcall} switch.
- 
- @item nesting
- @cindex @code{nesting} function attribute, Blackfin
-@@ -3415,410 +3537,349 @@ is an NMI handler.  The compiler generates function entry and
- exit sequences suitable for use in an NMI handler when this
- attribute is present.
- 
--@item nocompression
--@cindex @code{nocompression} function attribute, MIPS
--On MIPS targets, you can use the @code{nocompression} function attribute
--to locally turn off MIPS16 and microMIPS code generation.  This attribute
--overrides the @option{-mips16} and @option{-mmicromips} options on the
--command line (@pxref{MIPS Options}).
--
--@item no_instrument_function
--@cindex @code{no_instrument_function} function attribute
--@opindex finstrument-functions
--If @option{-finstrument-functions} is given, profiling function calls are
--generated at entry and exit of most user-compiled functions.
--Functions with this attribute are not so instrumented.
+-@item no_instrument_function
+-@cindex @code{no_instrument_function} function attribute
+-@opindex finstrument-functions
+-If @option{-finstrument-functions} is given, profiling function calls are
+-generated at entry and exit of most user-compiled functions.
+-Functions with this attribute are not so instrumented.
 -
 -@item no_split_stack
 -@cindex @code{no_split_stack} function attribute
@@ -2582,46 +2578,37 @@
 +On MeP targets, this attribute causes the compiler to emit
 +instructions to disable interrupts for the duration of the given
 +function.
- 
--@item resbank
--@cindex @code{resbank} function attribute, SH
--On the SH2A target, this attribute enables the high-speed register
--saving and restoration using a register bank for @code{interrupt_handler}
--routines.  Saving to the bank is performed automatically after the CPU
--accepts an interrupt that uses a register bank.
++
 +@item interrupt
 +@cindex @code{interrupt} function attribute, MeP
 +Use this attribute to indicate
 +that the specified function is an interrupt handler.  The compiler generates
 +function entry and exit sequences suitable for use in an interrupt handler
 +when this attribute is present.
- 
--The nineteen 32-bit registers comprising general register R0 to R14,
--control register GBR, and system registers MACH, MACL, and PR and the
--vector table address offset are saved into a register bank.  Register
--banks are stacked in first-in last-out (FILO) sequence.  Restoration
--from the bank is executed by issuing a RESBANK instruction.
++
 +@item near
 +@cindex @code{near} function attribute, MeP
 +This attribute causes the compiler to assume the called
 +function is close enough to use the normal calling convention,
 +overriding the @option{-mtf} command-line option.
  
--@item returns_twice
--@cindex @code{returns_twice} function attribute
--The @code{returns_twice} attribute tells the compiler that a function may
--return more than one time.  The compiler ensures that all registers
--are dead before calling such a function and emits a warning about
--the variables that may be clobbered after the second return from the
--function.  Examples of such functions are @code{setjmp} and @code{vfork}.
--The @code{longjmp}-like counterpart of such function, if any, might need
--to be marked with the @code{noreturn} attribute.
+-@item resbank
+-@cindex @code{resbank} function attribute, SH
+-On the SH2A target, this attribute enables the high-speed register
+-saving and restoration using a register bank for @code{interrupt_handler}
+-routines.  Saving to the bank is performed automatically after the CPU
+-accepts an interrupt that uses a register bank.
 +@item far
 +@cindex @code{far} function attribute, MeP
 +On MeP targets this causes the compiler to use a calling convention
 +that assumes the called function is too far away for the built-in
 +addressing modes.
-+
+ 
+-The nineteen 32-bit registers comprising general register R0 to R14,
+-control register GBR, and system registers MACH, MACL, and PR and the
+-vector table address offset are saved into a register bank.  Register
+-banks are stacked in first-in last-out (FILO) sequence.  Restoration
+-from the bank is executed by issuing a RESBANK instruction.
 +@item vliw
 +@cindex @code{vliw} function attribute, MeP
 +The @code{vliw} attribute tells the compiler to emit
@@ -2630,6 +2617,18 @@
 +and enabled through command-line options.
 +@end table
  
+-@item returns_twice
+-@cindex @code{returns_twice} function attribute
+-The @code{returns_twice} attribute tells the compiler that a function may
+-return more than one time.  The compiler ensures that all registers
+-are dead before calling such a function and emits a warning about
+-the variables that may be clobbered after the second return from the
+-function.  Examples of such functions are @code{setjmp} and @code{vfork}.
+-The @code{longjmp}-like counterpart of such function, if any, might need
+-to be marked with the @code{noreturn} attribute.
++@node MicroBlaze Function Attributes
++@subsection MicroBlaze Function Attributes
+ 
 -@item saveall
 -@cindex @code{saveall} function attribute, Blackfin
 -@cindex @code{saveall} function attribute, H8/300
@@ -2637,9 +2636,6 @@
 -Use this attribute on the Blackfin, H8/300, H8/300H, and H8S to indicate that
 -all registers except the stack pointer should be saved in the prologue
 -regardless of whether they are used or not.
-+@node MicroBlaze Function Attributes
-+@subsection MicroBlaze Function Attributes
-+
 +These function attributes are supported on MicroBlaze targets:
  
 +@table @code
@@ -2650,7 +2646,7 @@
  an interrupt handler.  All volatile registers (in addition to non-volatile
  registers) are saved in the function prologue.  If the function is a leaf
  function, only volatiles used by the function are saved.  A normal function
-@@ -3827,7 +3888,7 @@ return is generated instead of a return from interrupt.
+@@ -3827,7 +3910,7 @@ return is generated instead of a return from interrupt.
  @item break_handler
  @cindex @code{break_handler} function attribute, MicroBlaze
  @cindex break handler functions
@@ -2659,7 +2655,7 @@
  the specified function is a break handler.  The compiler generates function
  entry and exit sequences suitable for use in an break handler when this
  attribute is present. The return from @code{break_handler} is done through
-@@ -3836,290 +3897,404 @@ the @code{rtbd} instead of @code{rtsd}.
+@@ -3836,290 +3919,404 @@ the @code{rtbd} instead of @code{rtsd}.
  @smallexample
  void f () __attribute__ ((break_handler));
  @end smallexample
@@ -2895,11 +2891,9 @@
 +Return using the @code{deret} instruction.  Interrupt handlers that don't
 +have this attribute return using @code{eret} instead.
 +@end table
- 
++
 +You can use any combination of these attributes, as shown below:
- @smallexample
--int core2_func (void) __attribute__ ((__target__ ("arch=core2")));
--int sse3_func (void) __attribute__ ((__target__ ("sse3")));
++@smallexample
 +void __attribute__ ((interrupt)) v0 ();
 +void __attribute__ ((interrupt, use_shadow_register_set)) v1 ();
 +void __attribute__ ((interrupt, keep_interrupts_masked)) v2 ();
@@ -2913,11 +2907,8 @@
 +void __attribute__ ((interrupt, use_shadow_register_set,
 +                     keep_interrupts_masked,
 +                     use_debug_exception_return)) v7 ();
- @end smallexample
- 
--You can either use multiple
--strings to specify multiple options, or separate the options
--with a comma (@samp{,}).
++@end smallexample
++
 +@item long_call
 +@itemx near
 +@itemx far
@@ -2934,15 +2925,18 @@
 +effect; it specifies that non-PIC calls should be made using the more
 +efficient @code{jal} instruction.
  
--The @code{target} attribute is presently implemented for
--x86, PowerPC, and Nios II targets only.
--The options supported are specific to each target.
+-@smallexample
+-int core2_func (void) __attribute__ ((__target__ ("arch=core2")));
+-int sse3_func (void) __attribute__ ((__target__ ("sse3")));
+-@end smallexample
 +@item mips16
 +@itemx nomips16
 +@cindex @code{mips16} function attribute, MIPS
 +@cindex @code{nomips16} function attribute, MIPS
  
--On the x86, the following options are allowed:
+-You can either use multiple
+-strings to specify multiple options, or separate the options
+-with a comma (@samp{,}).
 +On MIPS targets, you can use the @code{mips16} and @code{nomips16}
 +function attributes to locally select or turn off MIPS16 code generation.
 +A function with the @code{mips16} attribute is emitted as MIPS16 code,
@@ -2950,13 +2944,17 @@
 +@code{nomips16} attribute.  These attributes override the
 +@option{-mips16} and @option{-mno-mips16} options on the command line
 +(@pxref{MIPS Options}).
-+
+ 
+-The @code{target} attribute is presently implemented for
+-x86, PowerPC, and Nios II targets only.
+-The options supported are specific to each target.
 +When compiling files containing mixed MIPS16 and non-MIPS16 code, the
 +preprocessor symbol @code{__mips16} reflects the setting on the command line,
 +not that within individual functions.  Mixed MIPS16 and non-MIPS16 code
 +may interact badly with some GCC extensions such as @code{__builtin_apply}
 +(@pxref{Constructing Calls}).
-+
+ 
+-On the x86, the following options are allowed:
 +@item micromips, MIPS
 +@itemx nomicromips, MIPS
 +@cindex @code{micromips} function attribute
@@ -3293,16 +3291,12 @@
  
  On the PowerPC, the following options are allowed:
  
-@@ -4293,35 +4468,193 @@ compilation tunes for the @var{CPU} architecture, and not the
+@@ -4293,666 +4490,535 @@ compilation tunes for the @var{CPU} architecture, and not the
  default tuning specified on the command line.
  @end table
  
 -When compiling for Nios II, the following options are allowed:
-+On the PowerPC, the inliner does not inline a
-+function that has different target options than the caller, unless the
-+callee has a subset of the target options of the caller.
-+@end table
- 
+-
 -@table @samp
 -@item custom-@var{insn}=@var{N}
 -@itemx no-custom-@var{insn}
@@ -3316,66 +3310,229 @@
 -@option{-mcustom-@var{insn}=@var{N}} and @option{-mno-custom-@var{insn}}
 -command-line options, and support the same set of @var{insn} keywords.
 -@xref{Nios II Options}, for more information.
-+@node RL78 Function Attributes
-+@subsection RL78 Function Attributes
- 
+-
 -@item custom-fpu-cfg=@var{name}
 -@cindex @code{target("custom-fpu-cfg=@var{name}")} function attribute, Nios II
 -This attribute corresponds to the @option{-mcustom-fpu-cfg=@var{name}}
 -command-line option, to select a predefined set of custom instructions
 -named @var{name}.
 -@xref{Nios II Options}, for more information.
-+These function attributes are supported by the RL78 back end:
-+
-+@table @code
-+@item interrupt
-+@itemx brk_interrupt
-+@cindex @code{interrupt} function attribute, RL78
-+@cindex @code{brk_interrupt} function attribute, RL78
-+These attributes indicate
-+that the specified function is an interrupt handler.  The compiler generates
-+function entry and exit sequences suitable for use in an interrupt handler
-+when this attribute is present.
-+
-+Use @code{brk_interrupt} instead of @code{interrupt} for
-+handlers intended to be used with the @code{BRK} opcode (i.e.@: those
-+that must end with @code{RETB} instead of @code{RETI}).
-+
-+@item naked
-+@cindex @code{naked} function attribute, RL78
-+This attribute allows the compiler to construct the
-+requisite function declaration, while allowing the body of the
-+function to be assembly code. The specified function will not have
-+prologue/epilogue sequences generated by the compiler. Only basic
-+@code{asm} statements can safely be included in naked functions
-+(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
-+basic @code{asm} and C code may appear to work, they cannot be
-+depended upon to work reliably and are not supported.
- @end table
- 
+-@end table
+-
 -On the x86 and PowerPC back ends, the inliner does not inline a
--function that has different target options than the caller, unless the
++On the PowerPC, the inliner does not inline a
+ function that has different target options than the caller, unless the
 -callee has a subset of the target options of the caller.  For example
 -a function declared with @code{target("sse3")} can inline a function
 -with @code{target("sse2")}, since @code{-msse3} implies @code{-msse2}.
-+@node RX Function Attributes
-+@subsection RX Function Attributes
-+
-+These function attributes are supported by the RX back end:
-+
-+@table @code
-+@item fast_interrupt
-+@cindex @code{fast_interrupt} function attribute, RX
-+Use this attribute on the RX port to indicate that the specified
-+function is a fast interrupt handler.  This is just like the
-+@code{interrupt} attribute, except that @code{freit} is used to return
-+instead of @code{reit}.
-+
-+@item interrupt
-+@cindex @code{interrupt} function attribute, RX
-+Use this attribute to indicate
-+that the specified function is an interrupt handler.  The compiler generates
-+function entry and exit sequences suitable for use in an interrupt handler
+-
+-@item trap_exit
+-@cindex @code{trap_exit} function attribute, SH
+-Use this attribute on the SH for an @code{interrupt_handler} to return using
+-@code{trapa} instead of @code{rte}.  This attribute expects an integer
+-argument specifying the trap number to be used.
+-
+-@item trapa_handler
+-@cindex @code{trapa_handler} function attribute, SH
+-On SH targets this function attribute is similar to @code{interrupt_handler}
+-but it does not save and restore all registers.
+-
+-@item unused
+-@cindex @code{unused} function attribute
+-This attribute, attached to a function, means that the function is meant
+-to be possibly unused.  GCC does not produce a warning for this
+-function.
+-
+-@item used
+-@cindex @code{used} function attribute
+-This attribute, attached to a function, means that code must be emitted
+-for the function even if it appears that the function is not referenced.
+-This is useful, for example, when the function is referenced only in
+-inline assembly.
+-
+-When applied to a member function of a C++ class template, the
+-attribute also means that the function is instantiated if the
+-class itself is instantiated.
+-
+-@item vector
+-@cindex @code{vector} function attribute, RX
+-This RX attribute is similar to the @code{interrupt} attribute, including its
+-parameters, but does not make the function an interrupt-handler type
+-function (i.e. it retains the normal C function calling ABI).  See the
+-@code{interrupt} attribute for a description of its arguments.
+-
+-@item version_id
+-@cindex @code{version_id} function attribute, IA-64
+-This IA-64 HP-UX attribute, attached to a global variable or function, renames a
+-symbol to contain a version string, thus allowing for function level
+-versioning.  HP-UX system header files may use function level versioning
+-for some system calls.
+-
+-@smallexample
+-extern int foo () __attribute__((version_id ("20040821")));
+-@end smallexample
+-
+-@noindent
+-Calls to @var{foo} are mapped to calls to @var{foo@{20040821@}}.
+-
+-@item visibility ("@var{visibility_type}")
+-@cindex @code{visibility} function attribute
+-This attribute affects the linkage of the declaration to which it is attached.
+-There are four supported @var{visibility_type} values: default,
+-hidden, protected or internal visibility.
+-
+-@smallexample
+-void __attribute__ ((visibility ("protected")))
+-f () @{ /* @r{Do something.} */; @}
+-int i __attribute__ ((visibility ("hidden")));
+-@end smallexample
+-
+-The possible values of @var{visibility_type} correspond to the
+-visibility settings in the ELF gABI.
+-
+-@table @dfn
+-@c keep this list of visibilities in alphabetical order.
+-
+-@item default
+-Default visibility is the normal case for the object file format.
+-This value is available for the visibility attribute to override other
+-options that may change the assumed visibility of entities.
+-
+-On ELF, default visibility means that the declaration is visible to other
+-modules and, in shared libraries, means that the declared entity may be
+-overridden.
+-
+-On Darwin, default visibility means that the declaration is visible to
+-other modules.
+-
+-Default visibility corresponds to ``external linkage'' in the language.
+-
+-@item hidden
+-Hidden visibility indicates that the entity declared has a new
+-form of linkage, which we call ``hidden linkage''.  Two
+-declarations of an object with hidden linkage refer to the same object
+-if they are in the same shared object.
+-
+-@item internal
+-Internal visibility is like hidden visibility, but with additional
+-processor specific semantics.  Unless otherwise specified by the
+-psABI, GCC defines internal visibility to mean that a function is
+-@emph{never} called from another module.  Compare this with hidden
+-functions which, while they cannot be referenced directly by other
+-modules, can be referenced indirectly via function pointers.  By
+-indicating that a function cannot be called from outside the module,
+-GCC may for instance omit the load of a PIC register since it is known
+-that the calling function loaded the correct value.
+-
+-@item protected
+-Protected visibility is like default visibility except that it
+-indicates that references within the defining module bind to the
+-definition in that module.  That is, the declared entity cannot be
+-overridden by another module.
+-
++callee has a subset of the target options of the caller.
+ @end table
+ 
+-All visibilities are supported on many, but not all, ELF targets
+-(supported when the assembler supports the @samp{.visibility}
+-pseudo-op).  Default visibility is supported everywhere.  Hidden
+-visibility is supported on Darwin targets.
+-
+-The visibility attribute should be applied only to declarations that
+-would otherwise have external linkage.  The attribute should be applied
+-consistently, so that the same entity should not be declared with
+-different settings of the attribute.
+-
+-In C++, the visibility attribute applies to types as well as functions
+-and objects, because in C++ types have linkage.  A class must not have
+-greater visibility than its non-static data member types and bases,
+-and class members default to the visibility of their class.  Also, a
+-declaration without explicit visibility is limited to the visibility
+-of its type.
++@node RL78 Function Attributes
++@subsection RL78 Function Attributes
+ 
+-In C++, you can mark member functions and static member variables of a
+-class with the visibility attribute.  This is useful if you know a
+-particular method or static member variable should only be used from
+-one shared object; then you can mark it hidden while the rest of the
+-class has default visibility.  Care must be taken to avoid breaking
+-the One Definition Rule; for example, it is usually not useful to mark
+-an inline method as hidden without marking the whole class as hidden.
++These function attributes are supported by the RL78 back end:
+ 
+-A C++ namespace declaration can also have the visibility attribute.
++@table @code
++@item interrupt
++@itemx brk_interrupt
++@cindex @code{interrupt} function attribute, RL78
++@cindex @code{brk_interrupt} function attribute, RL78
++These attributes indicate
++that the specified function is an interrupt handler.  The compiler generates
++function entry and exit sequences suitable for use in an interrupt handler
++when this attribute is present.
+ 
+-@smallexample
+-namespace nspace1 __attribute__ ((visibility ("protected")))
+-@{ /* @r{Do something.} */; @}
+-@end smallexample
++Use @code{brk_interrupt} instead of @code{interrupt} for
++handlers intended to be used with the @code{BRK} opcode (i.e.@: those
++that must end with @code{RETB} instead of @code{RETI}).
+ 
+-This attribute applies only to the particular namespace body, not to
+-other definitions of the same namespace; it is equivalent to using
+-@samp{#pragma GCC visibility} before and after the namespace
+-definition (@pxref{Visibility Pragmas}).
++@item naked
++@cindex @code{naked} function attribute, RL78
++This attribute allows the compiler to construct the
++requisite function declaration, while allowing the body of the
++function to be assembly code. The specified function will not have
++prologue/epilogue sequences generated by the compiler. Only basic
++@code{asm} statements can safely be included in naked functions
++(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
++basic @code{asm} and C code may appear to work, they cannot be
++depended upon to work reliably and are not supported.
++@end table
+ 
+-In C++, if a template argument has limited visibility, this
+-restriction is implicitly propagated to the template instantiation.
+-Otherwise, template instantiations and specializations default to the
+-visibility of their template.
++@node RX Function Attributes
++@subsection RX Function Attributes
+ 
+-If both the template and enclosing class have explicit visibility, the
+-visibility from the template is used.
++These function attributes are supported by the RX back end:
+ 
+-@item vliw
+-@cindex @code{vliw} function attribute, MeP
+-On MeP, the @code{vliw} attribute tells the compiler to emit
+-instructions in VLIW mode instead of core mode.  Note that this
+-attribute is not allowed unless a VLIW coprocessor has been configured
+-and enabled through command-line options.
++@table @code
++@item fast_interrupt
++@cindex @code{fast_interrupt} function attribute, RX
++Use this attribute on the RX port to indicate that the specified
++function is a fast interrupt handler.  This is just like the
++@code{interrupt} attribute, except that @code{freit} is used to return
++instead of @code{reit}.
+ 
+-@item warn_unused_result
+-@cindex @code{warn_unused_result} function attribute
+-The @code{warn_unused_result} attribute causes a warning to be emitted
+-if a caller of the function with this attribute does not use its
+-return value.  This is useful for functions where not checking
+-the result is either a security problem or always a bug, such as
+-@code{realloc}.
++@item interrupt
++@cindex @code{interrupt} function attribute, RX
++Use this attribute to indicate
++that the specified function is an interrupt handler.  The compiler generates
++function entry and exit sequences suitable for use in an interrupt handler
 +when this attribute is present.
 +
 +On RX targets, you may specify one or more vector numbers as arguments
@@ -3384,14 +3541,23 @@
 +multiple entries in multiple tables.  One may also pass the magic
 +string @code{"$default"} which causes the function to be used for any
 +unfilled slots in the current table.
-+
+ 
 +This example shows a simple assignment of a function to one vector in
 +the default table (note that preprocessor macros may be used for
 +chip-specific symbolic vector names):
-+@smallexample
+ @smallexample
+-int fn () __attribute__ ((warn_unused_result));
+-int foo ()
+-@{
+-  if (fn () < 0) return -1;
+-  fn ();
+-  return 0;
+-@}
 +void __attribute__ ((interrupt (5))) txd1_handler ();
-+@end smallexample
-+
+ @end smallexample
+ 
+-@noindent
+-results in warning on line 5.
 +This example assigns a function to two slots in the default table
 +(using preprocessor macros defined elsewhere) and makes it the default
 +for the @code{dct} table:
@@ -3399,7 +3565,15 @@
 +void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default")))
 +	txd1_handler ();
 +@end smallexample
-+
+ 
+-@item weak
+-@cindex @code{weak} function attribute
+-The @code{weak} attribute causes the declaration to be emitted as a weak
+-symbol rather than a global.  This is primarily useful in defining
+-library functions that can be overridden in user code, though it can
+-also be used with non-function declarations.  Weak symbols are supported
+-for ELF targets, and also for a.out targets when using the GNU assembler
+-and linker.
 +@item naked
 +@cindex @code{naked} function attribute, RX
 +This attribute allows the compiler to construct the
@@ -3410,7 +3584,17 @@
 +(@pxref{Basic Asm}). While using extended @code{asm} or a mixture of
 +basic @code{asm} and C code may appear to work, they cannot be
 +depended upon to work reliably and are not supported.
-+
+ 
+-@item weakref
+-@itemx weakref ("@var{target}")
+-@cindex @code{weakref} function attribute
+-The @code{weakref} attribute marks a declaration as a weak reference.
+-Without arguments, it should be accompanied by an @code{alias} attribute
+-naming the target symbol.  Optionally, the @var{target} may be given as
+-an argument to @code{weakref} itself.  In either case, @code{weakref}
+-implicitly marks the declaration as @code{weak}.  Without a
+-@var{target}, given as an argument to @code{weakref} or to @code{alias},
+-@code{weakref} is equivalent to @code{weak}.
 +@item vector
 +@cindex @code{vector} function attribute, RX
 +This RX attribute is similar to the @code{interrupt} attribute, including its
@@ -3418,16 +3602,36 @@
 +function (i.e. it retains the normal C function calling ABI).  See the
 +@code{interrupt} attribute for a description of its arguments.
 +@end table
-+
+ 
+-@smallexample
+-static int x() __attribute__ ((weakref ("y")));
+-/* is equivalent to... */
+-static int x() __attribute__ ((weak, weakref, alias ("y")));
+-/* and to... */
+-static int x() __attribute__ ((weakref));
+-static int x() __attribute__ ((alias ("y")));
+-@end smallexample
 +@node S/390 Function Attributes
 +@subsection S/390 Function Attributes
-+
-+These function attributes are supported on the S/390:
-+
-+@table @code
+ 
+-A weak reference is an alias that does not by itself require a
+-definition to be given for the target symbol.  If the target symbol is
+-only referenced through weak references, then it becomes a @code{weak}
+-undefined symbol.  If it is directly referenced, however, then such
+-strong references prevail, and a definition is required for the
+-symbol, not necessarily in the same translation unit.
++These function attributes are supported on the S/390:
+ 
+-The effect is equivalent to moving all references to the alias to a
+-separate translation unit, renaming the alias to the aliased symbol,
+-declaring it as weak, compiling the two separate translation units and
+-performing a reloadable link on them.
++@table @code
 +@item hotpatch (@var{halfwords-before-function-label},@var{halfwords-after-function-label})
 +@cindex @code{hotpatch} function attribute, S/390
-+
+ 
+-At present, a declaration to which @code{weakref} is attached can
+-only be @code{static}.
 +On S/390 System z targets, you can use this function attribute to
 +make GCC generate a ``hot-patching'' function prologue.  If the
 +@option{-mhotpatch=} command-line option is used at the same time,
@@ -3436,15 +3640,27 @@
 +the function label.  A second argument can be used to specify the
 +number of halfwords to be added after the function label.  For
 +both arguments the maximum allowed value is 1000000.
-+
+ 
 +If both arguments are zero, hotpatching is disabled.
-+@end table
-+
+ @end table
+ 
+-You can specify multiple attributes in a declaration by separating them
+-by commas within the double parentheses or by immediately following an
+-attribute declaration with another attribute declaration.
 +@node SH Function Attributes
 +@subsection SH Function Attributes
-+
+ 
+-@cindex @code{#pragma}, reason for not using
+-@cindex pragma, reason for not using
+-Some people object to the @code{__attribute__} feature, suggesting that
+-ISO C's @code{#pragma} should be used instead.  At the time
+-@code{__attribute__} was designed, there were two reasons for not doing
+-this.
 +These function attributes are supported on the SH family of processors:
-+
+ 
+-@enumerate
+-@item
+-It is impossible to generate @code{#pragma} commands from a macro.
 +@table @code
 +@item function_vector
 +@cindex @code{function_vector} function attribute, SH
@@ -3459,38 +3675,62 @@
 +entries.  The jumps to these functions are generated using a SH2A specific,
 +non delayed branch instruction JSR/N @@(disp8,TBR).  You must use GAS and GLD
 +from GNU binutils version 2.7 or later for this attribute to work correctly.
-+
+ 
+-@item
+-There is no telling what the same @code{#pragma} might mean in another
+-compiler.
+-@end enumerate
 +In an application, for a function being called once, this attribute
 +saves at least 8 bytes of code; and if other successive calls are being
 +made to the same function, it saves 2 bytes of code per each of these
 +calls.
-+
+ 
+-These two reasons applied to almost any application that might have been
+-proposed for @code{#pragma}.  It was basically a mistake to use
+-@code{#pragma} for @emph{anything}.
 +@item interrupt_handler
 +@cindex @code{interrupt_handler} function attribute, SH
 +Use this attribute to
 +indicate that the specified function is an interrupt handler.  The compiler
 +generates function entry and exit sequences suitable for use in an
 +interrupt handler when this attribute is present.
-+
+ 
+-The ISO C99 standard includes @code{_Pragma}, which now allows pragmas
+-to be generated from macros.  In addition, a @code{#pragma GCC}
+-namespace is now in use for GCC-specific pragmas.  However, it has been
+-found convenient to use @code{__attribute__} to achieve a natural
+-attachment of attributes to their corresponding declarations, whereas
+-@code{#pragma GCC} is of use for constructs that do not naturally form
+-part of the grammar.  @xref{Pragmas,,Pragmas Accepted by GCC}.
 +@item nosave_low_regs
 +@cindex @code{nosave_low_regs} function attribute, SH
 +Use this attribute on SH targets to indicate that an @code{interrupt_handler}
 +function should not save and restore registers R0..R7.  This can be used on SH3*
 +and SH4* targets that have a second R0..R7 register bank for non-reentrant
 +interrupt handlers.
-+
+ 
+-@node Label Attributes
+-@section Label Attributes
+-@cindex Label Attributes
 +@item renesas
 +@cindex @code{renesas} function attribute, SH
 +On SH targets this attribute specifies that the function or struct follows the
 +Renesas ABI.
-+
+ 
+-GCC allows attributes to be set on C labels.  @xref{Attribute Syntax}, for 
+-details of the exact syntax for using attributes.  Other attributes are 
+-available for functions (@pxref{Function Attributes}), variables 
+-(@pxref{Variable Attributes}) and for types (@pxref{Type Attributes}).
 +@item resbank
 +@cindex @code{resbank} function attribute, SH
 +On the SH2A target, this attribute enables the high-speed register
 +saving and restoration using a register bank for @code{interrupt_handler}
 +routines.  Saving to the bank is performed automatically after the CPU
 +accepts an interrupt that uses a register bank.
-+
+ 
+-This example uses the @code{cold} label attribute to indicate the 
+-@code{ErrorHandling} branch is unlikely to be taken and that the
+-@code{ErrorHandling} label is unused:
 +The nineteen 32-bit registers comprising general register R0 to R14,
 +control register GBR, and system registers MACH, MACL, and PR and the
 +vector table address offset are saved into a register bank.  Register
@@ -3503,34 +3743,54 @@
 +function should switch to an alternate stack.  It expects a string
 +argument that names a global variable holding the address of the
 +alternate stack.
-+
-+@smallexample
+ 
+ @smallexample
 +void *alt_stack;
 +void f () __attribute__ ((interrupt_handler,
 +                          sp_switch ("alt_stack")));
 +@end smallexample
  
- @item trap_exit
- @cindex @code{trap_exit} function attribute, SH
-@@ -4329,256 +4662,341 @@ Use this attribute on the SH for an @code{interrupt_handler} to return using
- @code{trapa} instead of @code{rte}.  This attribute expects an integer
- argument specifying the trap number to be used.
- 
--@item trapa_handler
--@cindex @code{trapa_handler} function attribute, SH
--On SH targets this function attribute is similar to @code{interrupt_handler}
--but it does not save and restore all registers.
+-   asm goto ("some asm" : : : : NoError);
++@item trap_exit
++@cindex @code{trap_exit} function attribute, SH
++Use this attribute on the SH for an @code{interrupt_handler} to return using
++@code{trapa} instead of @code{rte}.  This attribute expects an integer
++argument specifying the trap number to be used.
+ 
+-/* This branch (the fall-through from the asm) is less commonly used */
+-ErrorHandling: 
+-   __attribute__((cold, unused)); /* Semi-colon is required here */
+-   printf("error\n");
+-   return 0;
 +@item trapa_handler
 +@cindex @code{trapa_handler} function attribute, SH
 +On SH targets this function attribute is similar to @code{interrupt_handler}
 +but it does not save and restore all registers.
 +@end table
-+
+ 
+-NoError:
+-   printf("no error\n");
+-   return 1;
+-@end smallexample
 +@node SPU Function Attributes
 +@subsection SPU Function Attributes
-+
+ 
+-@table @code
+-@item unused
+-@cindex @code{unused} label attribute
+-This feature is intended for program-generated code that may contain 
+-unused labels, but which is compiled with @option{-Wall}.  It is
+-not normally appropriate to use in it human-written code, though it
+-could be useful in cases where the code that jumps to the label is
+-contained within an @code{#ifdef} conditional.
 +These function attributes are supported by the SPU back end:
-+
+ 
+-@item hot
+-@cindex @code{hot} label attribute
+-The @code{hot} attribute on a label is used to inform the compiler that
+-the path following the label is more likely than paths that are not so
+-annotated.  This attribute is used in cases where @code{__builtin_expect}
+-cannot be used, for instance with computed goto or @code{asm goto}.
 +@table @code
 +@item naked
 +@cindex @code{naked} function attribute, SPU
@@ -3543,18 +3803,40 @@
 +basic @code{asm} and C code may appear to work, they cannot be
 +depended upon to work reliably and are not supported.
 +@end table
-+
+ 
+-@item cold
+-@cindex @code{cold} label attribute
+-The @code{cold} attribute on labels is used to inform the compiler that
+-the path following the label is unlikely to be executed.  This attribute
+-is used in cases where @code{__builtin_expect} cannot be used, for instance
+-with computed goto or @code{asm goto}.
 +@node Symbian OS Function Attributes
 +@subsection Symbian OS Function Attributes
-+
+ 
+-@end table
 +@xref{Microsoft Windows Function Attributes}, for discussion of the
 +@code{dllexport} and @code{dllimport} attributes.
-+
+ 
+-@node Attribute Syntax
+-@section Attribute Syntax
+-@cindex attribute syntax
 +@node Visium Function Attributes
 +@subsection Visium Function Attributes
-+
+ 
+-This section describes the syntax with which @code{__attribute__} may be
+-used, and the constructs to which attribute specifiers bind, for the C
+-language.  Some details may vary for C++ and Objective-C@.  Because of
+-infelicities in the grammar for attributes, some forms described here
+-may not be successfully parsed in all cases.
 +These function attributes are supported by the Visium back end:
-+
+ 
+-There are some problems with the semantics of attributes in C++.  For
+-example, there are no manglings for attributes, although they may affect
+-code generation, so problems may arise when attributed types are used in
+-conjunction with templates or overloading.  Similarly, @code{typeid}
+-does not distinguish between types with different attributes.  Support
+-for attributes in C++ may be restricted in future to attributes on
+-declarations only, but not on nested declarators.
 +@table @code
 +@item interrupt
 +@cindex @code{interrupt} function attribute, Visium
@@ -3563,12 +3845,26 @@
 +function entry and exit sequences suitable for use in an interrupt handler
 +when this attribute is present.
 +@end table
-+
+ 
+-@xref{Function Attributes}, for details of the semantics of attributes
+-applying to functions.  @xref{Variable Attributes}, for details of the
+-semantics of attributes applying to variables.  @xref{Type Attributes},
+-for details of the semantics of attributes applying to structure, union
+-and enumerated types.
+-@xref{Label Attributes}, for details of the semantics of attributes 
+-applying to labels.
 +@node x86 Function Attributes
 +@subsection x86 Function Attributes
-+
+ 
+-An @dfn{attribute specifier} is of the form
+-@code{__attribute__ ((@var{attribute-list}))}.  An @dfn{attribute list}
+-is a possibly empty comma-separated sequence of @dfn{attributes}, where
+-each attribute is one of the following:
 +These function attributes are supported by the x86 back end:
-+
+ 
+-@itemize @bullet
+-@item
+-Empty.  Empty attributes are ignored.
 +@table @code
 +@item cdecl
 +@cindex @code{cdecl} function attribute, x86-32
@@ -3578,7 +3874,10 @@
 +assume that the calling function pops off the stack space used to
 +pass arguments.  This is
 +useful to override the effects of the @option{-mrtd} switch.
-+
+ 
+-@item
+-A word (which may be an identifier such as @code{unused}, or a reserved
+-word such as @code{const}).
 +@item fastcall
 +@cindex @code{fastcall} function attribute, x86-32
 +@cindex functions that pop the argument stack on x86-32
@@ -3588,7 +3887,10 @@
 +and other typed arguments are passed on the stack.  The called function
 +pops the arguments off the stack.  If the number of arguments is variable all
 +arguments are pushed on the stack.
-+
+ 
+-@item
+-A word, followed by, in parentheses, parameters for the attribute.
+-These parameters take one of the following forms:
 +@item thiscall
 +@cindex @code{thiscall} function attribute, x86-32
 +@cindex functions that pop the argument stack on x86-32
@@ -3601,17 +3903,18 @@
 +The @code{thiscall} attribute is intended for C++ non-static member functions.
 +As a GCC extension, this calling convention can be used for C functions
 +and for static member methods.
-+
+ 
+-@itemize @bullet
+-@item
+-An identifier.  For example, @code{mode} attributes use this form.
 +@item ms_abi
 +@itemx sysv_abi
 +@cindex @code{ms_abi} function attribute, x86
 +@cindex @code{sysv_abi} function attribute, x86
  
--@item unused
--@cindex @code{unused} function attribute
--This attribute, attached to a function, means that the function is meant
--to be possibly unused.  GCC does not produce a warning for this
--function.
+-@item
+-An identifier followed by a comma and a non-empty comma-separated list
+-of expressions.  For example, @code{format} attributes use this form.
 +On 32-bit and 64-bit x86 targets, you can use an ABI attribute
 +to indicate which calling convention should be used for a function.  The
 +@code{ms_abi} attribute tells the compiler to use the Microsoft ABI,
@@ -3619,62 +3922,63 @@
 +used on GNU/Linux and other systems.  The default is to use the Microsoft ABI
 +when targeting Windows.  On all other systems, the default is the x86/AMD ABI.
  
--@item used
--@cindex @code{used} function attribute
--This attribute, attached to a function, means that code must be emitted
--for the function even if it appears that the function is not referenced.
--This is useful, for example, when the function is referenced only in
--inline assembly.
+-@item
+-A possibly empty comma-separated list of expressions.  For example,
+-@code{format_arg} attributes use this form with the list being a single
+-integer constant expression, and @code{alias} attributes use this form
+-with the list being a single string constant.
+-@end itemize
+-@end itemize
 +Note, the @code{ms_abi} attribute for Microsoft Windows 64-bit targets currently
 +requires the @option{-maccumulate-outgoing-args} option.
  
--When applied to a member function of a C++ class template, the
--attribute also means that the function is instantiated if the
--class itself is instantiated.
+-An @dfn{attribute specifier list} is a sequence of one or more attribute
+-specifiers, not separated by any other tokens.
 +@item callee_pop_aggregate_return (@var{number})
 +@cindex @code{callee_pop_aggregate_return} function attribute, x86
  
--@item vector
--@cindex @code{vector} function attribute, RX
--This RX attribute is similar to the @code{interrupt} attribute, including its
--parameters, but does not make the function an interrupt-handler type
--function (i.e. it retains the normal C function calling ABI).  See the
--@code{interrupt} attribute for a description of its arguments.
+-@subsubheading Label Attributes
 +On x86-32 targets, you can use this attribute to control how
 +aggregates are returned in memory.  If the caller is responsible for
 +popping the hidden pointer together with the rest of the arguments, specify
 +@var{number} equal to zero.  If callee is responsible for popping the
 +hidden pointer, specify @var{number} equal to one.  
  
--@item version_id
--@cindex @code{version_id} function attribute, IA-64
--This IA-64 HP-UX attribute, attached to a global variable or function, renames a
--symbol to contain a version string, thus allowing for function level
--versioning.  HP-UX system header files may use function level versioning
--for some system calls.
+-In GNU C, an attribute specifier list may appear after the colon following a
+-label, other than a @code{case} or @code{default} label.  GNU C++ only permits
+-attributes on labels if the attribute specifier is immediately
+-followed by a semicolon (i.e., the label applies to an empty
+-statement).  If the semicolon is missing, C++ label attributes are
+-ambiguous, as it is permissible for a declaration, which could begin
+-with an attribute list, to be labelled in C++.  Declarations cannot be
+-labelled in C90 or C99, so the ambiguity does not arise there.
 +The default x86-32 ABI assumes that the callee pops the
 +stack for hidden pointer.  However, on x86-32 Microsoft Windows targets,
 +the compiler assumes that the
 +caller pops the stack for hidden pointer.
  
--@smallexample
--extern int foo () __attribute__((version_id ("20040821")));
--@end smallexample
+-@subsubheading Type Attributes
 +@item ms_hook_prologue
 +@cindex @code{ms_hook_prologue} function attribute, x86
  
--@noindent
--Calls to @var{foo} are mapped to calls to @var{foo@{20040821@}}.
+-An attribute specifier list may appear as part of a @code{struct},
+-@code{union} or @code{enum} specifier.  It may go either immediately
+-after the @code{struct}, @code{union} or @code{enum} keyword, or after
+-the closing brace.  The former syntax is preferred.
+-Where attribute specifiers follow the closing brace, they are considered
+-to relate to the structure, union or enumerated type defined, not to any
+-enclosing declaration the type specifier appears in, and the type
+-defined is not complete until after the attribute specifiers.
+-@c Otherwise, there would be the following problems: a shift/reduce
+-@c conflict between attributes binding the struct/union/enum and
+-@c binding to the list of specifiers/qualifiers; and "aligned"
+-@c attributes could use sizeof for the structure, but the size could be
+-@c changed later by "packed" attributes.
 +On 32-bit and 64-bit x86 targets, you can use
 +this function attribute to make GCC generate the ``hot-patching'' function
 +prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2
 +and newer.
  
--@item visibility ("@var{visibility_type}")
--@cindex @code{visibility} function attribute
--This attribute affects the linkage of the declaration to which it is attached.
--There are four supported @var{visibility_type} values: default,
--hidden, protected or internal visibility.
 +@item regparm (@var{number})
 +@cindex @code{regparm} function attribute, x86
 +@cindex functions that are passed arguments in registers on x86-32
@@ -3684,11 +3988,7 @@
 +take a variable number of arguments continue to be passed all of their
 +arguments on the stack.
  
--@smallexample
--void __attribute__ ((visibility ("protected")))
--f () @{ /* @r{Do something.} */; @}
--int i __attribute__ ((visibility ("hidden")));
--@end smallexample
+-@subsubheading All other attributes
 +Beware that on some ELF systems this attribute is unsuitable for
 +global functions in shared libraries with lazy binding (which is the
 +default).  Lazy binding sends the first call via resolving code in
@@ -3700,8 +4000,15 @@
 +disabled with the linker or the loader if desired, to avoid the
 +problem.)
  
--The possible values of @var{visibility_type} correspond to the
--visibility settings in the ELF gABI.
+-Otherwise, an attribute specifier appears as part of a declaration,
+-counting declarations of unnamed parameters and type names, and relates
+-to that declaration (which may be nested in another declaration, for
+-example in the case of a parameter declaration), or to a particular declarator
+-within a declaration.  Where an
+-attribute specifier is applied to a parameter declared as a function or
+-an array, it should apply to the function or array rather than the
+-pointer to which the parameter is implicitly converted, but this is not
+-yet correctly implemented.
 +@item sseregparm
 +@cindex @code{sseregparm} function attribute, x86
 +On x86-32 targets with SSE support, the @code{sseregparm} attribute
@@ -3710,8 +4017,22 @@
 +variable number of arguments continue to pass all of their
 +floating-point arguments on the stack.
  
--@table @dfn
--@c keep this list of visibilities in alphabetical order.
+-Any list of specifiers and qualifiers at the start of a declaration may
+-contain attribute specifiers, whether or not such a list may in that
+-context contain storage class specifiers.  (Some attributes, however,
+-are essentially in the nature of storage class specifiers, and only make
+-sense where storage class specifiers may be used; for example,
+-@code{section}.)  There is one necessary limitation to this syntax: the
+-first old-style parameter declaration in a function definition cannot
+-begin with an attribute specifier, because such an attribute applies to
+-the function instead by syntax described below (which, however, is not
+-yet implemented in this case).  In some other cases, attribute
+-specifiers are permitted by this grammar but not yet supported by the
+-compiler.  All attribute specifiers in this place relate to the
+-declaration as a whole.  In the obsolescent usage where a type of
+-@code{int} is implied by the absence of type specifiers, such a list of
+-specifiers and qualifiers may be an attribute specifier list with no
+-other specifiers or qualifiers.
 +@item force_align_arg_pointer
 +@cindex @code{force_align_arg_pointer} function attribute, x86
 +On x86 targets, the @code{force_align_arg_pointer} attribute may be
@@ -3720,10 +4041,14 @@
 +This supports mixing legacy codes that run with a 4-byte aligned stack
 +with modern codes that keep a 16-byte stack for SSE compatibility.
  
--@item default
--Default visibility is the normal case for the object file format.
--This value is available for the visibility attribute to override other
--options that may change the assumed visibility of entities.
+-At present, the first parameter in a function prototype must have some
+-type specifier that is not an attribute specifier; this resolves an
+-ambiguity in the interpretation of @code{void f(int
+-(__attribute__((foo)) x))}, but is subject to change.  At present, if
+-the parentheses of a function declarator contain only attributes then
+-those attributes are ignored, rather than yielding an error or warning
+-or implying a single parameter of type int, but this is subject to
+-change.
 +@item stdcall
 +@cindex @code{stdcall} function attribute, x86-32
 +@cindex functions that pop the argument stack on x86-32
@@ -3731,16 +4056,22 @@
 +assume that the called function pops off the stack space used to
 +pass arguments, unless it takes a variable number of arguments.
  
--On ELF, default visibility means that the declaration is visible to other
--modules and, in shared libraries, means that the declared entity may be
--overridden.
+-An attribute specifier list may appear immediately before a declarator
+-(other than the first) in a comma-separated list of declarators in a
+-declaration of more than one identifier using a single list of
+-specifiers and qualifiers.  Such attribute specifiers apply
+-only to the identifier before whose declarator they appear.  For
+-example, in
 +@item target (@var{options})
 +@cindex @code{target} function attribute
 +As discussed in @ref{Common Function Attributes}, this attribute 
 +allows specification of target-specific compilation options.
  
--On Darwin, default visibility means that the declaration is visible to
--other modules.
+-@smallexample
+-__attribute__((noreturn)) void d0 (void),
+-    __attribute__((format(printf, 1, 2))) d1 (const char *, ...),
+-     d2 (void);
+-@end smallexample
 +On the x86, the following options are allowed:
 +@table @samp
 +@item abm
@@ -3748,244 +4079,250 @@
 +@cindex @code{target("abm")} function attribute, x86
 +Enable/disable the generation of the advanced bit instructions.
  
--Default visibility corresponds to ``external linkage'' in the language.
+-@noindent
+-the @code{noreturn} attribute applies to all the functions
+-declared; the @code{format} attribute only applies to @code{d1}.
 +@item aes
 +@itemx no-aes
 +@cindex @code{target("aes")} function attribute, x86
 +Enable/disable the generation of the AES instructions.
  
--@item hidden
--Hidden visibility indicates that the entity declared has a new
--form of linkage, which we call ``hidden linkage''.  Two
--declarations of an object with hidden linkage refer to the same object
--if they are in the same shared object.
+-An attribute specifier list may appear immediately before the comma,
+-@code{=} or semicolon terminating the declaration of an identifier other
+-than a function definition.  Such attribute specifiers apply
+-to the declared object or function.  Where an
+-assembler name for an object or function is specified (@pxref{Asm
+-Labels}), the attribute must follow the @code{asm}
+-specification.
 +@item default
 +@cindex @code{target("default")} function attribute, x86
 +@xref{Function Multiversioning}, where it is used to specify the
 +default function version.
  
--@item internal
--Internal visibility is like hidden visibility, but with additional
--processor specific semantics.  Unless otherwise specified by the
--psABI, GCC defines internal visibility to mean that a function is
--@emph{never} called from another module.  Compare this with hidden
--functions which, while they cannot be referenced directly by other
--modules, can be referenced indirectly via function pointers.  By
--indicating that a function cannot be called from outside the module,
--GCC may for instance omit the load of a PIC register since it is known
--that the calling function loaded the correct value.
+-An attribute specifier list may, in future, be permitted to appear after
+-the declarator in a function definition (before any old-style parameter
+-declarations or the function body).
 +@item mmx
 +@itemx no-mmx
 +@cindex @code{target("mmx")} function attribute, x86
 +Enable/disable the generation of the MMX instructions.
  
--@item protected
--Protected visibility is like default visibility except that it
--indicates that references within the defining module bind to the
--definition in that module.  That is, the declared entity cannot be
--overridden by another module.
+-Attribute specifiers may be mixed with type qualifiers appearing inside
+-the @code{[]} of a parameter array declarator, in the C99 construct by
+-which such qualifiers are applied to the pointer to which the array is
+-implicitly converted.  Such attribute specifiers apply to the pointer,
+-not to the array, but at present this is not implemented and they are
+-ignored.
 +@item pclmul
 +@itemx no-pclmul
 +@cindex @code{target("pclmul")} function attribute, x86
 +Enable/disable the generation of the PCLMUL instructions.
  
--@end table
+-An attribute specifier list may appear at the start of a nested
+-declarator.  At present, there are some limitations in this usage: the
+-attributes correctly apply to the declarator, but for most individual
+-attributes the semantics this implies are not implemented.
+-When attribute specifiers follow the @code{*} of a pointer
+-declarator, they may be mixed with any type qualifiers present.
+-The following describes the formal semantics of this syntax.  It makes the
+-most sense if you are familiar with the formal specification of
+-declarators in the ISO C standard.
 +@item popcnt
 +@itemx no-popcnt
 +@cindex @code{target("popcnt")} function attribute, x86
 +Enable/disable the generation of the POPCNT instruction.
  
--All visibilities are supported on many, but not all, ELF targets
--(supported when the assembler supports the @samp{.visibility}
--pseudo-op).  Default visibility is supported everywhere.  Hidden
--visibility is supported on Darwin targets.
+-Consider (as in C99 subclause 6.7.5 paragraph 4) a declaration @code{T
+-D1}, where @code{T} contains declaration specifiers that specify a type
+-@var{Type} (such as @code{int}) and @code{D1} is a declarator that
+-contains an identifier @var{ident}.  The type specified for @var{ident}
+-for derived declarators whose type does not include an attribute
+-specifier is as in the ISO C standard.
 +@item sse
 +@itemx no-sse
 +@cindex @code{target("sse")} function attribute, x86
 +Enable/disable the generation of the SSE instructions.
  
--The visibility attribute should be applied only to declarations that
--would otherwise have external linkage.  The attribute should be applied
--consistently, so that the same entity should not be declared with
--different settings of the attribute.
+-If @code{D1} has the form @code{( @var{attribute-specifier-list} D )},
+-and the declaration @code{T D} specifies the type
+-``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then
+-@code{T D1} specifies the type ``@var{derived-declarator-type-list}
+-@var{attribute-specifier-list} @var{Type}'' for @var{ident}.
 +@item sse2
 +@itemx no-sse2
 +@cindex @code{target("sse2")} function attribute, x86
 +Enable/disable the generation of the SSE2 instructions.
  
--In C++, the visibility attribute applies to types as well as functions
--and objects, because in C++ types have linkage.  A class must not have
--greater visibility than its non-static data member types and bases,
--and class members default to the visibility of their class.  Also, a
--declaration without explicit visibility is limited to the visibility
--of its type.
+-If @code{D1} has the form @code{*
+-@var{type-qualifier-and-attribute-specifier-list} D}, and the
+-declaration @code{T D} specifies the type
+-``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then
+-@code{T D1} specifies the type ``@var{derived-declarator-type-list}
+-@var{type-qualifier-and-attribute-specifier-list} pointer to @var{Type}'' for
+-@var{ident}.
 +@item sse3
 +@itemx no-sse3
 +@cindex @code{target("sse3")} function attribute, x86
 +Enable/disable the generation of the SSE3 instructions.
  
--In C++, you can mark member functions and static member variables of a
--class with the visibility attribute.  This is useful if you know a
--particular method or static member variable should only be used from
--one shared object; then you can mark it hidden while the rest of the
--class has default visibility.  Care must be taken to avoid breaking
--the One Definition Rule; for example, it is usually not useful to mark
--an inline method as hidden without marking the whole class as hidden.
+-For example,
 +@item sse4
 +@itemx no-sse4
 +@cindex @code{target("sse4")} function attribute, x86
 +Enable/disable the generation of the SSE4 instructions (both SSE4.1
 +and SSE4.2).
  
--A C++ namespace declaration can also have the visibility attribute.
+-@smallexample
+-void (__attribute__((noreturn)) ****f) (void);
+-@end smallexample
 +@item sse4.1
 +@itemx no-sse4.1
 +@cindex @code{target("sse4.1")} function attribute, x86
 +Enable/disable the generation of the sse4.1 instructions.
  
--@smallexample
--namespace nspace1 __attribute__ ((visibility ("protected")))
--@{ /* @r{Do something.} */; @}
--@end smallexample
+-@noindent
+-specifies the type ``pointer to pointer to pointer to pointer to
+-non-returning function returning @code{void}''.  As another example,
 +@item sse4.2
 +@itemx no-sse4.2
 +@cindex @code{target("sse4.2")} function attribute, x86
 +Enable/disable the generation of the sse4.2 instructions.
  
--This attribute applies only to the particular namespace body, not to
--other definitions of the same namespace; it is equivalent to using
--@samp{#pragma GCC visibility} before and after the namespace
--definition (@pxref{Visibility Pragmas}).
+-@smallexample
+-char *__attribute__((aligned(8))) *f;
+-@end smallexample
 +@item sse4a
 +@itemx no-sse4a
 +@cindex @code{target("sse4a")} function attribute, x86
 +Enable/disable the generation of the SSE4A instructions.
  
--In C++, if a template argument has limited visibility, this
--restriction is implicitly propagated to the template instantiation.
--Otherwise, template instantiations and specializations default to the
--visibility of their template.
+-@noindent
+-specifies the type ``pointer to 8-byte-aligned pointer to @code{char}''.
+-Note again that this does not work with most attributes; for example,
+-the usage of @samp{aligned} and @samp{noreturn} attributes given above
+-is not yet supported.
 +@item fma4
 +@itemx no-fma4
 +@cindex @code{target("fma4")} function attribute, x86
 +Enable/disable the generation of the FMA4 instructions.
  
--If both the template and enclosing class have explicit visibility, the
--visibility from the template is used.
+-For compatibility with existing code written for compiler versions that
+-did not implement attributes on nested declarators, some laxity is
+-allowed in the placing of attributes.  If an attribute that only applies
+-to types is applied to a declaration, it is treated as applying to
+-the type of that declaration.  If an attribute that only applies to
+-declarations is applied to the type of a declaration, it is treated
+-as applying to that declaration; and, for compatibility with code
+-placing the attributes immediately before the identifier declared, such
+-an attribute applied to a function return type is treated as
+-applying to the function type, and such an attribute applied to an array
+-element type is treated as applying to the array type.  If an
+-attribute that only applies to function types is applied to a
+-pointer-to-function type, it is treated as applying to the pointer
+-target type; if such an attribute is applied to a function return type
+-that is not a pointer-to-function type, it is treated as applying
+-to the function type.
 +@item xop
 +@itemx no-xop
 +@cindex @code{target("xop")} function attribute, x86
 +Enable/disable the generation of the XOP instructions.
  
--@item vliw
--@cindex @code{vliw} function attribute, MeP
--On MeP, the @code{vliw} attribute tells the compiler to emit
--instructions in VLIW mode instead of core mode.  Note that this
--attribute is not allowed unless a VLIW coprocessor has been configured
--and enabled through command-line options.
+-@node Function Prototypes
+-@section Prototypes and Old-Style Function Definitions
+-@cindex function prototype declarations
+-@cindex old-style function definitions
+-@cindex promotion of formal parameters
 +@item lwp
 +@itemx no-lwp
 +@cindex @code{target("lwp")} function attribute, x86
 +Enable/disable the generation of the LWP instructions.
  
--@item warn_unused_result
--@cindex @code{warn_unused_result} function attribute
--The @code{warn_unused_result} attribute causes a warning to be emitted
--if a caller of the function with this attribute does not use its
--return value.  This is useful for functions where not checking
--the result is either a security problem or always a bug, such as
--@code{realloc}.
+-GNU C extends ISO C to allow a function prototype to override a later
+-old-style non-prototype definition.  Consider the following example:
 +@item ssse3
 +@itemx no-ssse3
 +@cindex @code{target("ssse3")} function attribute, x86
 +Enable/disable the generation of the SSSE3 instructions.
  
 -@smallexample
--int fn () __attribute__ ((warn_unused_result));
--int foo ()
--@{
--  if (fn () < 0) return -1;
--  fn ();
--  return 0;
--@}
--@end smallexample
+-/* @r{Use prototypes unless the compiler is old-fashioned.}  */
+-#ifdef __STDC__
+-#define P(x) x
+-#else
+-#define P(x) ()
+-#endif
 +@item cld
 +@itemx no-cld
 +@cindex @code{target("cld")} function attribute, x86
 +Enable/disable the generation of the CLD before string moves.
  
--@noindent
--results in warning on line 5.
+-/* @r{Prototype function declaration.}  */
+-int isroot P((uid_t));
 +@item fancy-math-387
 +@itemx no-fancy-math-387
 +@cindex @code{target("fancy-math-387")} function attribute, x86
 +Enable/disable the generation of the @code{sin}, @code{cos}, and
 +@code{sqrt} instructions on the 387 floating-point unit.
  
--@item weak
--@cindex @code{weak} function attribute
--The @code{weak} attribute causes the declaration to be emitted as a weak
--symbol rather than a global.  This is primarily useful in defining
--library functions that can be overridden in user code, though it can
--also be used with non-function declarations.  Weak symbols are supported
--for ELF targets, and also for a.out targets when using the GNU assembler
--and linker.
+-/* @r{Old-style function definition.}  */
+-int
+-isroot (x)   /* @r{??? lossage here ???} */
+-     uid_t x;
+-@{
+-  return x == 0;
+-@}
+-@end smallexample
 +@item fused-madd
 +@itemx no-fused-madd
 +@cindex @code{target("fused-madd")} function attribute, x86
 +Enable/disable the generation of the fused multiply/add instructions.
  
--@item weakref
--@itemx weakref ("@var{target}")
--@cindex @code{weakref} function attribute
--The @code{weakref} attribute marks a declaration as a weak reference.
--Without arguments, it should be accompanied by an @code{alias} attribute
--naming the target symbol.  Optionally, the @var{target} may be given as
--an argument to @code{weakref} itself.  In either case, @code{weakref}
--implicitly marks the declaration as @code{weak}.  Without a
--@var{target}, given as an argument to @code{weakref} or to @code{alias},
--@code{weakref} is equivalent to @code{weak}.
+-Suppose the type @code{uid_t} happens to be @code{short}.  ISO C does
+-not allow this example, because subword arguments in old-style
+-non-prototype definitions are promoted.  Therefore in this example the
+-function definition's argument is really an @code{int}, which does not
+-match the prototype argument type of @code{short}.
 +@item ieee-fp
 +@itemx no-ieee-fp
 +@cindex @code{target("ieee-fp")} function attribute, x86
 +Enable/disable the generation of floating point that depends on IEEE arithmetic.
  
--@smallexample
--static int x() __attribute__ ((weakref ("y")));
--/* is equivalent to... */
--static int x() __attribute__ ((weak, weakref, alias ("y")));
--/* and to... */
--static int x() __attribute__ ((weakref));
--static int x() __attribute__ ((alias ("y")));
--@end smallexample
+-This restriction of ISO C makes it hard to write code that is portable
+-to traditional C compilers, because the programmer does not know
+-whether the @code{uid_t} type is @code{short}, @code{int}, or
+-@code{long}.  Therefore, in cases like these GNU C allows a prototype
+-to override a later old-style definition.  More precisely, in GNU C, a
+-function prototype argument type overrides the argument type specified
+-by a later old-style definition if the former type is the same as the
+-latter type before promotion.  Thus in GNU C the above example is
+-equivalent to the following:
 +@item inline-all-stringops
 +@itemx no-inline-all-stringops
 +@cindex @code{target("inline-all-stringops")} function attribute, x86
 +Enable/disable inlining of string operations.
  
--A weak reference is an alias that does not by itself require a
--definition to be given for the target symbol.  If the target symbol is
--only referenced through weak references, then it becomes a @code{weak}
--undefined symbol.  If it is directly referenced, however, then such
--strong references prevail, and a definition is required for the
--symbol, not necessarily in the same translation unit.
+-@smallexample
+-int isroot (uid_t);
 +@item inline-stringops-dynamically
 +@itemx no-inline-stringops-dynamically
 +@cindex @code{target("inline-stringops-dynamically")} function attribute, x86
 +Enable/disable the generation of the inline code to do small string
 +operations and calling the library routines for large operations.
  
--The effect is equivalent to moving all references to the alias to a
--separate translation unit, renaming the alias to the aliased symbol,
--declaring it as weak, compiling the two separate translation units and
--performing a reloadable link on them.
+-int
+-isroot (uid_t x)
+-@{
+-  return x == 0;
+-@}
+-@end smallexample
 +@item align-stringops
 +@itemx no-align-stringops
 +@cindex @code{target("align-stringops")} function attribute, x86
 +Do/do not align destination of inlined string operations.
  
--At present, a declaration to which @code{weakref} is attached can
--only be @code{static}.
+-@noindent
+-GNU C++ does not support old-style function definitions, so this
+-extension is irrelevant.
 +@item recip
 +@itemx no-recip
 +@cindex @code{target("recip")} function attribute, x86
@@ -3993,24 +4330,30 @@
 +instructions followed an additional Newton-Raphson step instead of
 +doing a floating-point division.
  
--@end table
+-@node C++ Comments
+-@section C++ Style Comments
+-@cindex @code{//}
+-@cindex C++ comments
+-@cindex comments, C++ style
 +@item arch=@var{ARCH}
 +@cindex @code{target("arch=@var{ARCH}")} function attribute, x86
 +Specify the architecture to generate code for in compiling the function.
  
--You can specify multiple attributes in a declaration by separating them
--by commas within the double parentheses or by immediately following an
--attribute declaration with another attribute declaration.
+-In GNU C, you may use C++ style comments, which start with @samp{//} and
+-continue until the end of the line.  Many other C implementations allow
+-such comments, and they are included in the 1999 C standard.  However,
+-C++ style comments are not recognized if you specify an @option{-std}
+-option specifying a version of ISO C before C99, or @option{-ansi}
+-(equivalent to @option{-std=c90}).
 +@item tune=@var{TUNE}
 +@cindex @code{target("tune=@var{TUNE}")} function attribute, x86
 +Specify the architecture to tune for in compiling the function.
  
--@cindex @code{#pragma}, reason for not using
--@cindex pragma, reason for not using
--Some people object to the @code{__attribute__} feature, suggesting that
--ISO C's @code{#pragma} should be used instead.  At the time
--@code{__attribute__} was designed, there were two reasons for not doing
--this.
+-@node Dollar Signs
+-@section Dollar Signs in Identifier Names
+-@cindex $
+-@cindex dollar signs in identifier names
+-@cindex identifier names, dollar signs in
 +@item fpmath=@var{FPMATH}
 +@cindex @code{target("fpmath=@var{FPMATH}")} function attribute, x86
 +Specify which floating-point unit to use.  You must specify the
@@ -4019,9 +4362,10 @@
 +different options.
 +@end table
  
--@enumerate
--@item
--It is impossible to generate @code{#pragma} commands from a macro.
+-In GNU C, you may normally use dollar signs in identifier names.
+-This is because many traditional C implementations allow such identifiers.
+-However, dollar signs in identifiers are not supported on a few target
+-machines, typically because the target assembler does not allow them.
 +On the x86, the inliner does not inline a
 +function that has different target options than the caller, unless the
 +callee has a subset of the target options of the caller.  For example
@@ -4029,25 +4373,15 @@
 +with @code{target("sse2")}, since @code{-msse3} implies @code{-msse2}.
 +@end table
  
--@item
--There is no telling what the same @code{#pragma} might mean in another
--compiler.
--@end enumerate
+-@node Character Escapes
+-@section The Character @key{ESC} in Constants
 +@node Xstormy16 Function Attributes
 +@subsection Xstormy16 Function Attributes
  
--These two reasons applied to almost any application that might have been
--proposed for @code{#pragma}.  It was basically a mistake to use
--@code{#pragma} for @emph{anything}.
+-You can use the sequence @samp{\e} in a string or character constant to
+-stand for the ASCII character @key{ESC}.
 +These function attributes are supported by the Xstormy16 back end:
- 
--The ISO C99 standard includes @code{_Pragma}, which now allows pragmas
--to be generated from macros.  In addition, a @code{#pragma GCC}
--namespace is now in use for GCC-specific pragmas.  However, it has been
--found convenient to use @code{__attribute__} to achieve a natural
--attachment of attributes to their corresponding declarations, whereas
--@code{#pragma GCC} is of use for constructs that do not naturally form
--part of the grammar.  @xref{Pragmas,,Pragmas Accepted by GCC}.
++
 +@table @code
 +@item interrupt
 +@cindex @code{interrupt} function attribute, Xstormy16
@@ -4057,39 +4391,9 @@
 +when this attribute is present.
 +@end table
  
- @node Label Attributes
- @section Label Attributes
-@@ -4669,11 +5087,13 @@ each attribute is one of the following:
- Empty.  Empty attributes are ignored.
- 
- @item
--A word (which may be an identifier such as @code{unused}, or a reserved
-+An attribute name
-+(which may be an identifier such as @code{unused}, or a reserved
- word such as @code{const}).
- 
- @item
--A word, followed by, in parentheses, parameters for the attribute.
-+An attribute name followed by a parenthesized list of
-+parameters for the attribute.
- These parameters take one of the following forms:
- 
- @itemize @bullet
-@@ -4695,6 +5115,13 @@ with the list being a single string constant.
- An @dfn{attribute specifier list} is a sequence of one or more attribute
- specifiers, not separated by any other tokens.
- 
-+You may optionally specify attribute names with @samp{__}
-+preceding and following the name.
-+This allows you to use them in header files without
-+being concerned about a possible macro of the same name.  For example,
-+you may use the attribute name @code{__noreturn__} instead of @code{noreturn}.
-+
-+
- @subsubheading Label Attributes
- 
- In GNU C, an attribute specifier list may appear after the colon following a
-@@ -4970,14 +5397,29 @@ types (@pxref{Type Attributes}).
+ @node Variable Attributes
+ @section Specifying Attributes of Variables
+@@ -4970,14 +5036,29 @@ types (@pxref{Type Attributes}).
  Other front ends might define more attributes
  (@pxref{C++ Extensions,,Extensions to the C++ Language}).
  
@@ -4124,7 +4428,7 @@
  @table @code
  @cindex @code{aligned} variable attribute
  @item aligned (@var{alignment})
-@@ -5046,7 +5488,7 @@ in an @code{__attribute__} still only provides you with 8-byte
+@@ -5046,7 +5127,7 @@ in an @code{__attribute__} still only provides you with 8-byte
  alignment.  See your linker documentation for further information.
  
  The @code{aligned} attribute can also be used for functions
@@ -4133,7 +4437,7 @@
  
  @item cleanup (@var{cleanup_function})
  @cindex @code{cleanup} variable attribute
-@@ -5100,7 +5542,8 @@ argument, which must be a string, is printed in the warning if
+@@ -5100,7 +5181,8 @@ argument, which must be a string, is printed in the warning if
  present.
  
  The @code{deprecated} attribute can also be used for functions and
@@ -4143,10 +4447,28 @@
  
  @item mode (@var{mode})
  @cindex @code{mode} variable attribute
-@@ -5186,33 +5629,6 @@ attribute is not available on all platforms.
- If you need to map the entire contents of a module to a particular
- section, consider using the facilities of the linker instead.
+@@ -5168,50 +5250,23 @@ main()
  
+ @noindent
+ Use the @code{section} attribute with
+-@emph{global} variables and not @emph{local} variables,
+-as shown in the example.
+-
+-You may use the @code{section} attribute with initialized or
+-uninitialized global variables but the linker requires
+-each object be defined once, with the exception that uninitialized
+-variables tentatively go in the @code{common} (or @code{bss}) section
+-and can be multiply ``defined''.  Using the @code{section} attribute
+-changes what section the variable goes into and may cause the
+-linker to issue an error if an uninitialized variable has multiple
+-definitions.  You can force a variable to be initialized with the
+-@option{-fno-common} flag or the @code{nocommon} attribute.
+-
+-Some file formats do not support arbitrary sections so the @code{section}
+-attribute is not available on all platforms.
+-If you need to map the entire contents of a module to a particular
+-section, consider using the facilities of the linker instead.
+-
 -@item shared
 -@cindex @code{shared} variable attribute
 -On Microsoft Windows, in addition to putting variable definitions in a named
@@ -4171,267 +4493,1447 @@
 -You may only use the @code{shared} attribute along with @code{section}
 -attribute with a fully-initialized global definition because of the way
 -linkers work.  See @code{section} attribute for more information.
--
++@emph{global} variables and not @emph{local} variables,
++as shown in the example.
+ 
 -The @code{shared} attribute is only available on Microsoft Windows@.
--
++You may use the @code{section} attribute with initialized or
++uninitialized global variables but the linker requires
++each object be defined once, with the exception that uninitialized
++variables tentatively go in the @code{common} (or @code{bss}) section
++and can be multiply ``defined''.  Using the @code{section} attribute
++changes what section the variable goes into and may cause the
++linker to issue an error if an uninitialized variable has multiple
++definitions.  You can force a variable to be initialized with the
++@option{-fno-common} flag or the @code{nocommon} attribute.
++
++Some file formats do not support arbitrary sections so the @code{section}
++attribute is not available on all platforms.
++If you need to map the entire contents of a module to a particular
++section, consider using the facilities of the linker instead.
+ 
  @item tls_model ("@var{tls_model}")
  @cindex @code{tls_model} variable attribute
- The @code{tls_model} attribute sets thread-local storage model
-@@ -5270,42 +5686,14 @@ struct S  __attribute__ ((vector_size (16))) foo;
- is invalid even if the size of the structure is the same as the size of
- the @code{int}.
- 
--@item selectany
--@cindex @code{selectany} variable attribute
--The @code{selectany} attribute causes an initialized global variable to
--have link-once semantics.  When multiple definitions of the variable are
--encountered by the linker, the first is selected and the remainder are
--discarded.  Following usage by the Microsoft compiler, the linker is told
--@emph{not} to warn about size or content differences of the multiple
--definitions.
--
--Although the primary usage of this attribute is for POD types, the
--attribute can also be applied to global C++ objects that are initialized
--by a constructor.  In this case, the static initialization and destruction
--code for the object is emitted in each translation defining the object,
--but the calls to the constructor and destructor are protected by a
--link-once guard variable.
--
--The @code{selectany} attribute is only available on Microsoft Windows
--targets.  You can use @code{__declspec (selectany)} as a synonym for
--@code{__attribute__ ((selectany))} for compatibility with other
--compilers.
--
- @item weak
- @cindex @code{weak} variable attribute
+@@ -5258,17 +5313,280 @@ This attribute is only applicable to integral and float scalars,
+ although arrays, pointers, and function return values are allowed in
+ conjunction with this construct.
+ 
+-Aggregates with this attribute are invalid, even if they are of the same
+-size as a corresponding scalar.  For example, the declaration:
++Aggregates with this attribute are invalid, even if they are of the same
++size as a corresponding scalar.  For example, the declaration:
++
++@smallexample
++struct S @{ int a; @};
++struct S  __attribute__ ((vector_size (16))) foo;
++@end smallexample
++
++@noindent
++is invalid even if the size of the structure is the same as the size of
++the @code{int}.
++
++@item weak
++@cindex @code{weak} variable attribute
++The @code{weak} attribute is described in
++@ref{Common Function Attributes}.
++
++@end table
++
++@node AVR Variable Attributes
++@subsection AVR Variable Attributes
++
++@table @code
++@item progmem
++@cindex @code{progmem} variable attribute, AVR
++The @code{progmem} attribute is used on the AVR to place read-only
++data in the non-volatile program memory (flash). The @code{progmem}
++attribute accomplishes this by putting respective variables into a
++section whose name starts with @code{.progmem}.
++
++This attribute works similar to the @code{section} attribute
++but adds additional checking. Notice that just like the
++@code{section} attribute, @code{progmem} affects the location
++of the data but not how this data is accessed.
++
++In order to read data located with the @code{progmem} attribute
++(inline) assembler must be used.
++@smallexample
++/* Use custom macros from @w{@uref{http://nongnu.org/avr-libc/user-manual/,AVR-LibC}} */
++#include <avr/pgmspace.h> 
++
++/* Locate var in flash memory */
++const int var[2] PROGMEM = @{ 1, 2 @};
++
++int read_var (int i)
++@{
++    /* Access var[] by accessor macro from avr/pgmspace.h */
++    return (int) pgm_read_word (& var[i]);
++@}
++@end smallexample
++
++AVR is a Harvard architecture processor and data and read-only data
++normally resides in the data memory (RAM).
++
++See also the @ref{AVR Named Address Spaces} section for
++an alternate way to locate and access data in flash memory.
++
++@item io
++@itemx io (@var{addr})
++@cindex @code{io} variable attribute, AVR
++Variables with the @code{io} attribute are used to address
++memory-mapped peripherals in the io address range.
++If an address is specified, the variable
++is assigned that address, and the value is interpreted as an
++address in the data address space.
++Example:
++
++@smallexample
++volatile int porta __attribute__((io (0x22)));
++@end smallexample
++
++The address specified in the address in the data address range.
++
++Otherwise, the variable it is not assigned an address, but the
++compiler will still use in/out instructions where applicable,
++assuming some other module assigns an address in the io address range.
++Example:
++
++@smallexample
++extern volatile int porta __attribute__((io));
++@end smallexample
++
++@item io_low
++@itemx io_low (@var{addr})
++@cindex @code{io_low} variable attribute, AVR
++This is like the @code{io} attribute, but additionally it informs the
++compiler that the object lies in the lower half of the I/O area,
++allowing the use of @code{cbi}, @code{sbi}, @code{sbic} and @code{sbis}
++instructions.
++
++@item address
++@itemx address (@var{addr})
++@cindex @code{address} variable attribute, AVR
++Variables with the @code{address} attribute are used to address
++memory-mapped peripherals that may lie outside the io address range.
++
++@smallexample
++volatile int porta __attribute__((address (0x600)));
++@end smallexample
++
++@end table
++
++@node Blackfin Variable Attributes
++@subsection Blackfin Variable Attributes
++
++Three attributes are currently defined for the Blackfin.
++
++@table @code
++@item l1_data
++@itemx l1_data_A
++@itemx l1_data_B
++@cindex @code{l1_data} variable attribute, Blackfin
++@cindex @code{l1_data_A} variable attribute, Blackfin
++@cindex @code{l1_data_B} variable attribute, Blackfin
++Use these attributes on the Blackfin to place the variable into L1 Data SRAM.
++Variables with @code{l1_data} attribute are put into the specific section
++named @code{.l1.data}. Those with @code{l1_data_A} attribute are put into
++the specific section named @code{.l1.data.A}. Those with @code{l1_data_B}
++attribute are put into the specific section named @code{.l1.data.B}.
++
++@item l2
++@cindex @code{l2} variable attribute, Blackfin
++Use this attribute on the Blackfin to place the variable into L2 SRAM.
++Variables with @code{l2} attribute are put into the specific section
++named @code{.l2.data}.
++@end table
++
++@node H8/300 Variable Attributes
++@subsection H8/300 Variable Attributes
++
++These variable attributes are available for H8/300 targets:
++
++@table @code
++@item eightbit_data
++@cindex @code{eightbit_data} variable attribute, H8/300
++@cindex eight-bit data on the H8/300, H8/300H, and H8S
++Use this attribute on the H8/300, H8/300H, and H8S to indicate that the specified
++variable should be placed into the eight-bit data section.
++The compiler generates more efficient code for certain operations
++on data in the eight-bit data area.  Note the eight-bit data area is limited to
++256 bytes of data.
++
++You must use GAS and GLD from GNU binutils version 2.7 or later for
++this attribute to work correctly.
++
++@item tiny_data
++@cindex @code{tiny_data} variable attribute, H8/300
++@cindex tiny data section on the H8/300H and H8S
++Use this attribute on the H8/300H and H8S to indicate that the specified
++variable should be placed into the tiny data section.
++The compiler generates more efficient code for loads and stores
++on data in the tiny data section.  Note the tiny data area is limited to
++slightly under 32KB of data.
++
++@end table
++
++@node IA-64 Variable Attributes
++@subsection IA-64 Variable Attributes
++
++The IA-64 back end supports the following variable attribute:
++
++@table @code
++@item model (@var{model-name})
++@cindex @code{model} variable attribute, IA-64
++
++On IA-64, use this attribute to set the addressability of an object.
++At present, the only supported identifier for @var{model-name} is
++@code{small}, indicating addressability via ``small'' (22-bit)
++addresses (so that their addresses can be loaded with the @code{addl}
++instruction).  Caveat: such addressing is by definition not position
++independent and hence this attribute must not be used for objects
++defined by shared libraries.
++
++@end table
++
++@node M32R/D Variable Attributes
++@subsection M32R/D Variable Attributes
++
++One attribute is currently defined for the M32R/D@.
++
++@table @code
++@item model (@var{model-name})
++@cindex @code{model-name} variable attribute, M32R/D
++@cindex variable addressability on the M32R/D
++Use this attribute on the M32R/D to set the addressability of an object.
++The identifier @var{model-name} is one of @code{small}, @code{medium},
++or @code{large}, representing each of the code models.
++
++Small model objects live in the lower 16MB of memory (so that their
++addresses can be loaded with the @code{ld24} instruction).
++
++Medium and large model objects may live anywhere in the 32-bit address space
++(the compiler generates @code{seth/add3} instructions to load their
++addresses).
++@end table
++
++@node MeP Variable Attributes
++@subsection MeP Variable Attributes
++
++The MeP target has a number of addressing modes and busses.  The
++@code{near} space spans the standard memory space's first 16 megabytes
++(24 bits).  The @code{far} space spans the entire 32-bit memory space.
++The @code{based} space is a 128-byte region in the memory space that
++is addressed relative to the @code{$tp} register.  The @code{tiny}
++space is a 65536-byte region relative to the @code{$gp} register.  In
++addition to these memory regions, the MeP target has a separate 16-bit
++control bus which is specified with @code{cb} attributes.
++
++@table @code
++
++@item based
++@cindex @code{based} variable attribute, MeP
++Any variable with the @code{based} attribute is assigned to the
++@code{.based} section, and is accessed with relative to the
++@code{$tp} register.
++
++@item tiny
++@cindex @code{tiny} variable attribute, MeP
++Likewise, the @code{tiny} attribute assigned variables to the
++@code{.tiny} section, relative to the @code{$gp} register.
++
++@item near
++@cindex @code{near} variable attribute, MeP
++Variables with the @code{near} attribute are assumed to have addresses
++that fit in a 24-bit addressing mode.  This is the default for large
++variables (@code{-mtiny=4} is the default) but this attribute can
++override @code{-mtiny=} for small variables, or override @code{-ml}.
++
++@item far
++@cindex @code{far} variable attribute, MeP
++Variables with the @code{far} attribute are addressed using a full
++32-bit address.  Since this covers the entire memory space, this
++allows modules to make no assumptions about where variables might be
++stored.
++
++@item io
++@cindex @code{io} variable attribute, MeP
++@itemx io (@var{addr})
++Variables with the @code{io} attribute are used to address
++memory-mapped peripherals.  If an address is specified, the variable
++is assigned that address, else it is not assigned an address (it is
++assumed some other module assigns an address).  Example:
++
++@smallexample
++int timer_count __attribute__((io(0x123)));
++@end smallexample
++
++@item cb
++@itemx cb (@var{addr})
++@cindex @code{cb} variable attribute, MeP
++Variables with the @code{cb} attribute are used to access the control
++bus, using special instructions.  @code{addr} indicates the control bus
++address.  Example:
+ 
+ @smallexample
+-struct S @{ int a; @};
+-struct S  __attribute__ ((vector_size (16))) foo;
++int cpu_clock __attribute__((cb(0x123)));
+ @end smallexample
+ 
+-@noindent
+-is invalid even if the size of the structure is the same as the size of
+-the @code{int}.
++@end table
++
++@node Microsoft Windows Variable Attributes
++@subsection Microsoft Windows Variable Attributes
++
++You can use these attributes on Microsoft Windows targets.
++@ref{x86 Variable Attributes} for additional Windows compatibility
++attributes available on all x86 targets.
++
++@table @code
++@item dllimport
++@itemx dllexport
++@cindex @code{dllimport} variable attribute
++@cindex @code{dllexport} variable attribute
++The @code{dllimport} and @code{dllexport} attributes are described in
++@ref{Microsoft Windows Function Attributes}.
+ 
+ @item selectany
+ @cindex @code{selectany} variable attribute
+@@ -5291,891 +5609,1062 @@ targets.  You can use @code{__declspec (selectany)} as a synonym for
+ @code{__attribute__ ((selectany))} for compatibility with other
+ compilers.
+ 
+-@item weak
+-@cindex @code{weak} variable attribute
 -The @code{weak} attribute is described in @ref{Function Attributes}.
--
++@item shared
++@cindex @code{shared} variable attribute
++On Microsoft Windows, in addition to putting variable definitions in a named
++section, the section can also be shared among all running copies of an
++executable or DLL@.  For example, this small program defines shared data
++by putting it in a named section @code{shared} and marking the section
++shareable:
+ 
 -@item dllimport
 -@cindex @code{dllimport} variable attribute
 -The @code{dllimport} attribute is described in @ref{Function Attributes}.
--
++@smallexample
++int foo __attribute__((section ("shared"), shared)) = 0;
+ 
 -@item dllexport
 -@cindex @code{dllexport} variable attribute
 -The @code{dllexport} attribute is described in @ref{Function Attributes}.
-+The @code{weak} attribute is described in
-+@ref{Common Function Attributes}.
++int
++main()
++@{
++  /* @r{Read and write foo.  All running
++     copies see the same value.}  */
++  return 0;
++@}
++@end smallexample
++
++@noindent
++You may only use the @code{shared} attribute along with @code{section}
++attribute with a fully-initialized global definition because of the way
++linkers work.  See @code{section} attribute for more information.
++
++The @code{shared} attribute is only available on Microsoft Windows@.
+ 
+ @end table
+ 
+-@anchor{AVR Variable Attributes}
+-@subsection AVR Variable Attributes
++@node PowerPC Variable Attributes
++@subsection PowerPC Variable Attributes
+ 
+-@table @code
+-@item progmem
+-@cindex @code{progmem} variable attribute, AVR
+-The @code{progmem} attribute is used on the AVR to place read-only
+-data in the non-volatile program memory (flash). The @code{progmem}
+-attribute accomplishes this by putting respective variables into a
+-section whose name starts with @code{.progmem}.
++Three attributes currently are defined for PowerPC configurations:
++@code{altivec}, @code{ms_struct} and @code{gcc_struct}.
+ 
+-This attribute works similar to the @code{section} attribute
+-but adds additional checking. Notice that just like the
+-@code{section} attribute, @code{progmem} affects the location
+-of the data but not how this data is accessed.
++@cindex @code{ms_struct} variable attribute, PowerPC
++@cindex @code{gcc_struct} variable attribute, PowerPC
++For full documentation of the struct attributes please see the
++documentation in @ref{x86 Variable Attributes}.
+ 
+-In order to read data located with the @code{progmem} attribute
+-(inline) assembler must be used.
+-@smallexample
+-/* Use custom macros from @w{@uref{http://nongnu.org/avr-libc/user-manual/,AVR-LibC}} */
+-#include <avr/pgmspace.h> 
++@cindex @code{altivec} variable attribute, PowerPC
++For documentation of @code{altivec} attribute please see the
++documentation in @ref{PowerPC Type Attributes}.
+ 
+-/* Locate var in flash memory */
+-const int var[2] PROGMEM = @{ 1, 2 @};
++@node SPU Variable Attributes
++@subsection SPU Variable Attributes
+ 
+-int read_var (int i)
+-@{
+-    /* Access var[] by accessor macro from avr/pgmspace.h */
+-    return (int) pgm_read_word (& var[i]);
+-@}
+-@end smallexample
++@cindex @code{spu_vector} variable attribute, SPU
++The SPU supports the @code{spu_vector} attribute for variables.  For
++documentation of this attribute please see the documentation in
++@ref{SPU Type Attributes}.
+ 
+-AVR is a Harvard architecture processor and data and read-only data
+-normally resides in the data memory (RAM).
++@node x86 Variable Attributes
++@subsection x86 Variable Attributes
+ 
+-See also the @ref{AVR Named Address Spaces} section for
+-an alternate way to locate and access data in flash memory.
++Two attributes are currently defined for x86 configurations:
++@code{ms_struct} and @code{gcc_struct}.
+ 
+-@item io
+-@itemx io (@var{addr})
+-@cindex @code{io} variable attribute, AVR
+-Variables with the @code{io} attribute are used to address
+-memory-mapped peripherals in the io address range.
+-If an address is specified, the variable
+-is assigned that address, and the value is interpreted as an
+-address in the data address space.
+-Example:
++@table @code
++@item ms_struct
++@itemx gcc_struct
++@cindex @code{ms_struct} variable attribute, x86
++@cindex @code{gcc_struct} variable attribute, x86
++
++If @code{packed} is used on a structure, or if bit-fields are used,
++it may be that the Microsoft ABI lays out the structure differently
++than the way GCC normally does.  Particularly when moving packed
++data between functions compiled with GCC and the native Microsoft compiler
++(either via function call or as data in a file), it may be necessary to access
++either format.
++
++Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86
++compilers to match the native Microsoft compiler.
++
++The Microsoft structure layout algorithm is fairly simple with the exception
++of the bit-field packing.  
++The padding and alignment of members of structures and whether a bit-field 
++can straddle a storage-unit boundary are determine by these rules:
++
++@enumerate
++@item Structure members are stored sequentially in the order in which they are
++declared: the first member has the lowest memory address and the last member
++the highest.
++
++@item Every data object has an alignment requirement.  The alignment requirement
++for all data except structures, unions, and arrays is either the size of the
++object or the current packing size (specified with either the
++@code{aligned} attribute or the @code{pack} pragma),
++whichever is less.  For structures, unions, and arrays,
++the alignment requirement is the largest alignment requirement of its members.
++Every object is allocated an offset so that:
+ 
+ @smallexample
+-volatile int porta __attribute__((io (0x22)));
++offset % alignment_requirement == 0
+ @end smallexample
+ 
+-The address specified in the address in the data address range.
++@item Adjacent bit-fields are packed into the same 1-, 2-, or 4-byte allocation
++unit if the integral types are the same size and if the next bit-field fits
++into the current allocation unit without crossing the boundary imposed by the
++common alignment requirements of the bit-fields.
++@end enumerate
+ 
+-Otherwise, the variable it is not assigned an address, but the
+-compiler will still use in/out instructions where applicable,
+-assuming some other module assigns an address in the io address range.
+-Example:
++MSVC interprets zero-length bit-fields in the following ways:
++
++@enumerate
++@item If a zero-length bit-field is inserted between two bit-fields that
++are normally coalesced, the bit-fields are not coalesced.
++
++For example:
+ 
+ @smallexample
+-extern volatile int porta __attribute__((io));
++struct
++ @{
++   unsigned long bf_1 : 12;
++   unsigned long : 0;
++   unsigned long bf_2 : 12;
++ @} t1;
+ @end smallexample
+ 
+-@item io_low
+-@itemx io_low (@var{addr})
+-@cindex @code{io_low} variable attribute, AVR
+-This is like the @code{io} attribute, but additionally it informs the
+-compiler that the object lies in the lower half of the I/O area,
+-allowing the use of @code{cbi}, @code{sbi}, @code{sbic} and @code{sbis}
+-instructions.
++@noindent
++The size of @code{t1} is 8 bytes with the zero-length bit-field.  If the
++zero-length bit-field were removed, @code{t1}'s size would be 4 bytes.
+ 
+-@item address
+-@itemx address (@var{addr})
+-@cindex @code{address} variable attribute, AVR
+-Variables with the @code{address} attribute are used to address
+-memory-mapped peripherals that may lie outside the io address range.
++@item If a zero-length bit-field is inserted after a bit-field, @code{foo}, and the
++alignment of the zero-length bit-field is greater than the member that follows it,
++@code{bar}, @code{bar} is aligned as the type of the zero-length bit-field.
++
++For example:
+ 
+ @smallexample
+-volatile int porta __attribute__((address (0x600)));
++struct
++ @{
++   char foo : 4;
++   short : 0;
++   char bar;
++ @} t2;
++
++struct
++ @{
++   char foo : 4;
++   short : 0;
++   double bar;
++ @} t3;
+ @end smallexample
+ 
+-@end table
++@noindent
++For @code{t2}, @code{bar} is placed at offset 2, rather than offset 1.
++Accordingly, the size of @code{t2} is 4.  For @code{t3}, the zero-length
++bit-field does not affect the alignment of @code{bar} or, as a result, the size
++of the structure.
+ 
+-@subsection Blackfin Variable Attributes
++Taking this into account, it is important to note the following:
+ 
+-Three attributes are currently defined for the Blackfin.
++@enumerate
++@item If a zero-length bit-field follows a normal bit-field, the type of the
++zero-length bit-field may affect the alignment of the structure as whole. For
++example, @code{t2} has a size of 4 bytes, since the zero-length bit-field follows a
++normal bit-field, and is of type short.
+ 
+-@table @code
+-@item l1_data
+-@itemx l1_data_A
+-@itemx l1_data_B
+-@cindex @code{l1_data} variable attribute, Blackfin
+-@cindex @code{l1_data_A} variable attribute, Blackfin
+-@cindex @code{l1_data_B} variable attribute, Blackfin
+-Use these attributes on the Blackfin to place the variable into L1 Data SRAM.
+-Variables with @code{l1_data} attribute are put into the specific section
+-named @code{.l1.data}. Those with @code{l1_data_A} attribute are put into
+-the specific section named @code{.l1.data.A}. Those with @code{l1_data_B}
+-attribute are put into the specific section named @code{.l1.data.B}.
++@item Even if a zero-length bit-field is not followed by a normal bit-field, it may
++still affect the alignment of the structure:
+ 
+-@item l2
+-@cindex @code{l2} variable attribute, Blackfin
+-Use this attribute on the Blackfin to place the variable into L2 SRAM.
+-Variables with @code{l2} attribute are put into the specific section
+-named @code{.l2.data}.
++@smallexample
++struct
++ @{
++   char foo : 6;
++   long : 0;
++ @} t4;
++@end smallexample
++
++@noindent
++Here, @code{t4} takes up 4 bytes.
++@end enumerate
++
++@item Zero-length bit-fields following non-bit-field members are ignored:
++
++@smallexample
++struct
++ @{
++   char foo;
++   long : 0;
++   char bar;
++ @} t5;
++@end smallexample
++
++@noindent
++Here, @code{t5} takes up 2 bytes.
++@end enumerate
+ @end table
+ 
+-@subsection H8/300 Variable Attributes
++@node Xstormy16 Variable Attributes
++@subsection Xstormy16 Variable Attributes
+ 
+-These variable attributes are available for H8/300 targets:
++One attribute is currently defined for xstormy16 configurations:
++@code{below100}.
+ 
+ @table @code
+-@item eightbit_data
+-@cindex @code{eightbit_data} variable attribute, H8/300
+-@cindex eight-bit data on the H8/300, H8/300H, and H8S
+-Use this attribute on the H8/300, H8/300H, and H8S to indicate that the specified
+-variable should be placed into the eight-bit data section.
+-The compiler generates more efficient code for certain operations
+-on data in the eight-bit data area.  Note the eight-bit data area is limited to
+-256 bytes of data.
+-
+-You must use GAS and GLD from GNU binutils version 2.7 or later for
+-this attribute to work correctly.
++@item below100
++@cindex @code{below100} variable attribute, Xstormy16
+ 
+-@item tiny_data
+-@cindex @code{tiny_data} variable attribute, H8/300
+-@cindex tiny data section on the H8/300H and H8S
+-Use this attribute on the H8/300H and H8S to indicate that the specified
+-variable should be placed into the tiny data section.
+-The compiler generates more efficient code for loads and stores
+-on data in the tiny data section.  Note the tiny data area is limited to
+-slightly under 32KB of data.
++If a variable has the @code{below100} attribute (@code{BELOW100} is
++allowed also), GCC places the variable in the first 0x100 bytes of
++memory and use special opcodes to access it.  Such variables are
++placed in either the @code{.bss_below100} section or the
++@code{.data_below100} section.
+ 
+ @end table
+ 
+-@subsection IA-64 Variable Attributes
++@node Type Attributes
++@section Specifying Attributes of Types
++@cindex attribute of types
++@cindex type attributes
+ 
+-The IA-64 back end supports the following variable attribute:
++The keyword @code{__attribute__} allows you to specify special
++attributes of types.  Some type attributes apply only to @code{struct}
++and @code{union} types, while others can apply to any type defined
++via a @code{typedef} declaration.  Other attributes are defined for
++functions (@pxref{Function Attributes}), labels (@pxref{Label 
++Attributes}) and for variables (@pxref{Variable Attributes}).
+ 
+-@table @code
+-@item model (@var{model-name})
+-@cindex @code{model} variable attribute, IA-64
++The @code{__attribute__} keyword is followed by an attribute specification
++inside double parentheses.  
+ 
+-On IA-64, use this attribute to set the addressability of an object.
+-At present, the only supported identifier for @var{model-name} is
+-@code{small}, indicating addressability via ``small'' (22-bit)
+-addresses (so that their addresses can be loaded with the @code{addl}
+-instruction).  Caveat: such addressing is by definition not position
+-independent and hence this attribute must not be used for objects
+-defined by shared libraries.
++You may specify type attributes in an enum, struct or union type
++declaration or definition by placing them immediately after the
++@code{struct}, @code{union} or @code{enum} keyword.  A less preferred
++syntax is to place them just past the closing curly brace of the
++definition.
+ 
+-@end table
++You can also include type attributes in a @code{typedef} declaration.
++@xref{Attribute Syntax}, for details of the exact syntax for using
++attributes.
+ 
+-@subsection M32R/D Variable Attributes
++@menu
++* Common Type Attributes::
++* ARM Type Attributes::
++* MeP Type Attributes::
++* PowerPC Type Attributes::
++* SPU Type Attributes::
++* x86 Type Attributes::
++@end menu
+ 
+-One attribute is currently defined for the M32R/D@.
++@node Common Type Attributes
++@subsection Common Type Attributes
++
++The following type attributes are supported on most targets.
+ 
+ @table @code
+-@item model (@var{model-name})
+-@cindex @code{model-name} variable attribute, M32R/D
+-@cindex variable addressability on the M32R/D
+-Use this attribute on the M32R/D to set the addressability of an object.
+-The identifier @var{model-name} is one of @code{small}, @code{medium},
+-or @code{large}, representing each of the code models.
++@cindex @code{aligned} type attribute
++@item aligned (@var{alignment})
++This attribute specifies a minimum alignment (in bytes) for variables
++of the specified type.  For example, the declarations:
+ 
+-Small model objects live in the lower 16MB of memory (so that their
+-addresses can be loaded with the @code{ld24} instruction).
++@smallexample
++struct S @{ short f[3]; @} __attribute__ ((aligned (8)));
++typedef int more_aligned_int __attribute__ ((aligned (8)));
++@end smallexample
+ 
+-Medium and large model objects may live anywhere in the 32-bit address space
+-(the compiler generates @code{seth/add3} instructions to load their
+-addresses).
+-@end table
++@noindent
++force the compiler to ensure (as far as it can) that each variable whose
++type is @code{struct S} or @code{more_aligned_int} is allocated and
++aligned @emph{at least} on a 8-byte boundary.  On a SPARC, having all
++variables of type @code{struct S} aligned to 8-byte boundaries allows
++the compiler to use the @code{ldd} and @code{std} (doubleword load and
++store) instructions when copying one variable of type @code{struct S} to
++another, thus improving run-time efficiency.
+ 
+-@anchor{MeP Variable Attributes}
+-@subsection MeP Variable Attributes
++Note that the alignment of any given @code{struct} or @code{union} type
++is required by the ISO C standard to be at least a perfect multiple of
++the lowest common multiple of the alignments of all of the members of
++the @code{struct} or @code{union} in question.  This means that you @emph{can}
++effectively adjust the alignment of a @code{struct} or @code{union}
++type by attaching an @code{aligned} attribute to any one of the members
++of such a type, but the notation illustrated in the example above is a
++more obvious, intuitive, and readable way to request the compiler to
++adjust the alignment of an entire @code{struct} or @code{union} type.
+ 
+-The MeP target has a number of addressing modes and busses.  The
+-@code{near} space spans the standard memory space's first 16 megabytes
+-(24 bits).  The @code{far} space spans the entire 32-bit memory space.
+-The @code{based} space is a 128-byte region in the memory space that
+-is addressed relative to the @code{$tp} register.  The @code{tiny}
+-space is a 65536-byte region relative to the @code{$gp} register.  In
+-addition to these memory regions, the MeP target has a separate 16-bit
+-control bus which is specified with @code{cb} attributes.
++As in the preceding example, you can explicitly specify the alignment
++(in bytes) that you wish the compiler to use for a given @code{struct}
++or @code{union} type.  Alternatively, you can leave out the alignment factor
++and just ask the compiler to align a type to the maximum
++useful alignment for the target machine you are compiling for.  For
++example, you could write:
+ 
+-@table @code
++@smallexample
++struct S @{ short f[3]; @} __attribute__ ((aligned));
++@end smallexample
+ 
+-@item based
+-@cindex @code{based} variable attribute, MeP
+-Any variable with the @code{based} attribute is assigned to the
+-@code{.based} section, and is accessed with relative to the
+-@code{$tp} register.
++Whenever you leave out the alignment factor in an @code{aligned}
++attribute specification, the compiler automatically sets the alignment
++for the type to the largest alignment that is ever used for any data
++type on the target machine you are compiling for.  Doing this can often
++make copy operations more efficient, because the compiler can use
++whatever instructions copy the biggest chunks of memory when performing
++copies to or from the variables that have types that you have aligned
++this way.
+ 
+-@item tiny
+-@cindex @code{tiny} variable attribute, MeP
+-Likewise, the @code{tiny} attribute assigned variables to the
+-@code{.tiny} section, relative to the @code{$gp} register.
++In the example above, if the size of each @code{short} is 2 bytes, then
++the size of the entire @code{struct S} type is 6 bytes.  The smallest
++power of two that is greater than or equal to that is 8, so the
++compiler sets the alignment for the entire @code{struct S} type to 8
++bytes.
+ 
+-@item near
+-@cindex @code{near} variable attribute, MeP
+-Variables with the @code{near} attribute are assumed to have addresses
+-that fit in a 24-bit addressing mode.  This is the default for large
+-variables (@code{-mtiny=4} is the default) but this attribute can
+-override @code{-mtiny=} for small variables, or override @code{-ml}.
++Note that although you can ask the compiler to select a time-efficient
++alignment for a given type and then declare only individual stand-alone
++objects of that type, the compiler's ability to select a time-efficient
++alignment is primarily useful only when you plan to create arrays of
++variables having the relevant (efficiently aligned) type.  If you
++declare or use arrays of variables of an efficiently-aligned type, then
++it is likely that your program also does pointer arithmetic (or
++subscripting, which amounts to the same thing) on pointers to the
++relevant type, and the code that the compiler generates for these
++pointer arithmetic operations is often more efficient for
++efficiently-aligned types than for other types.
+ 
+-@item far
+-@cindex @code{far} variable attribute, MeP
+-Variables with the @code{far} attribute are addressed using a full
+-32-bit address.  Since this covers the entire memory space, this
+-allows modules to make no assumptions about where variables might be
+-stored.
++The @code{aligned} attribute can only increase the alignment; but you
++can decrease it by specifying @code{packed} as well.  See below.
+ 
+-@item io
+-@cindex @code{io} variable attribute, MeP
+-@itemx io (@var{addr})
+-Variables with the @code{io} attribute are used to address
+-memory-mapped peripherals.  If an address is specified, the variable
+-is assigned that address, else it is not assigned an address (it is
+-assumed some other module assigns an address).  Example:
++Note that the effectiveness of @code{aligned} attributes may be limited
++by inherent limitations in your linker.  On many systems, the linker is
++only able to arrange for variables to be aligned up to a certain maximum
++alignment.  (For some linkers, the maximum supported alignment may
++be very very small.)  If your linker is only able to align variables
++up to a maximum of 8-byte alignment, then specifying @code{aligned(16)}
++in an @code{__attribute__} still only provides you with 8-byte
++alignment.  See your linker documentation for further information.
++
++@opindex fshort-enums
++Specifying this attribute for @code{struct} and @code{union} types is
++equivalent to specifying the @code{packed} attribute on each of the
++structure or union members.  Specifying the @option{-fshort-enums}
++flag on the line is equivalent to specifying the @code{packed}
++attribute on all @code{enum} definitions.
++
++In the following example @code{struct my_packed_struct}'s members are
++packed closely together, but the internal layout of its @code{s} member
++is not packed---to do that, @code{struct my_unpacked_struct} needs to
++be packed too.
+ 
+ @smallexample
+-int timer_count __attribute__((io(0x123)));
++struct my_unpacked_struct
++ @{
++    char c;
++    int i;
++ @};
++
++struct __attribute__ ((__packed__)) my_packed_struct
++  @{
++     char c;
++     int  i;
++     struct my_unpacked_struct s;
++  @};
+ @end smallexample
+ 
+-@item cb
+-@itemx cb (@var{addr})
+-@cindex @code{cb} variable attribute, MeP
+-Variables with the @code{cb} attribute are used to access the control
+-bus, using special instructions.  @code{addr} indicates the control bus
+-address.  Example:
++You may only specify this attribute on the definition of an @code{enum},
++@code{struct} or @code{union}, not on a @code{typedef} that does not
++also define the enumerated type, structure or union.
++
++@item bnd_variable_size
++@cindex @code{bnd_variable_size} type attribute
++@cindex Pointer Bounds Checker attributes
++When applied to a structure field, this attribute tells Pointer
++Bounds Checker that the size of this field should not be computed
++using static type information.  It may be used to mark variably-sized
++static array fields placed at the end of a structure.
++
++@smallexample
++struct S
++@{
++  int size;
++  char data[1];
++@}
++S *p = (S *)malloc (sizeof(S) + 100);
++p->data[10] = 0; //Bounds violation
++@end smallexample
++
++@noindent
++By using an attribute for the field we may avoid unwanted bound
++violation checks:
++
++@smallexample
++struct S
++@{
++  int size;
++  char data[1] __attribute__((bnd_variable_size));
++@}
++S *p = (S *)malloc (sizeof(S) + 100);
++p->data[10] = 0; //OK
++@end smallexample
++
++@item deprecated
++@itemx deprecated (@var{msg})
++@cindex @code{deprecated} type attribute
++The @code{deprecated} attribute results in a warning if the type
++is used anywhere in the source file.  This is useful when identifying
++types that are expected to be removed in a future version of a program.
++If possible, the warning also includes the location of the declaration
++of the deprecated type, to enable users to easily find further
++information about why the type is deprecated, or what they should do
++instead.  Note that the warnings only occur for uses and then only
++if the type is being applied to an identifier that itself is not being
++declared as deprecated.
  
- @end table
+ @smallexample
+-int cpu_clock __attribute__((cb(0x123)));
++typedef int T1 __attribute__ ((deprecated));
++T1 x;
++typedef T1 T2;
++T2 y;
++typedef T1 T3 __attribute__ ((deprecated));
++T3 z __attribute__ ((deprecated));
+ @end smallexample
  
--@anchor{AVR Variable Attributes}
-+@node AVR Variable Attributes
- @subsection AVR Variable Attributes
+-@end table
+-
+-@subsection PowerPC Variable Attributes
++@noindent
++results in a warning on line 2 and 3 but not lines 4, 5, or 6.  No
++warning is issued for line 4 because T2 is not explicitly
++deprecated.  Line 5 has no warning because T3 is explicitly
++deprecated.  Similarly for line 6.  The optional @var{msg}
++argument, which must be a string, is printed in the warning if
++present.
+ 
+-Three attributes currently are defined for PowerPC configurations:
+-@code{altivec}, @code{ms_struct} and @code{gcc_struct}.
++The @code{deprecated} attribute can also be used for functions and
++variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.)
+ 
+-@cindex @code{ms_struct} variable attribute, PowerPC
+-@cindex @code{gcc_struct} variable attribute, PowerPC
+-For full documentation of the struct attributes please see the
+-documentation in @ref{x86 Variable Attributes}.
++@item designated_init
++@cindex @code{designated_init} type attribute
++This attribute may only be applied to structure types.  It indicates
++that any initialization of an object of this type must use designated
++initializers rather than positional initializers.  The intent of this
++attribute is to allow the programmer to indicate that a structure's
++layout may change, and that therefore relying on positional
++initialization will result in future breakage.
  
- @table @code
-@@ -5388,6 +5776,7 @@ volatile int porta __attribute__((address (0x600)));
+-@cindex @code{altivec} variable attribute, PowerPC
+-For documentation of @code{altivec} attribute please see the
+-documentation in @ref{PowerPC Type Attributes}.
++GCC emits warnings based on this attribute by default; use
++@option{-Wno-designated-init} to suppress them.
  
- @end table
+-@subsection SPU Variable Attributes
++@item may_alias
++@cindex @code{may_alias} type attribute
++Accesses through pointers to types with this attribute are not subject
++to type-based alias analysis, but are instead assumed to be able to alias
++any other type of objects.
++In the context of section 6.5 paragraph 7 of the C99 standard,
++an lvalue expression
++dereferencing such a pointer is treated like having a character type.
++See @option{-fstrict-aliasing} for more information on aliasing issues.
++This extension exists to support some vector APIs, in which pointers to
++one vector type are permitted to alias pointers to a different vector type.
+ 
+-@cindex @code{spu_vector} variable attribute, SPU
+-The SPU supports the @code{spu_vector} attribute for variables.  For
+-documentation of this attribute please see the documentation in
+-@ref{SPU Type Attributes}.
++Note that an object of a type with this attribute does not have any
++special semantics.
  
-+@node Blackfin Variable Attributes
- @subsection Blackfin Variable Attributes
+-@anchor{x86 Variable Attributes}
+-@subsection x86 Variable Attributes
++Example of use:
  
- Three attributes are currently defined for the Blackfin.
-@@ -5412,6 +5801,7 @@ Variables with @code{l2} attribute are put into the specific section
- named @code{.l2.data}.
- @end table
+-Two attributes are currently defined for x86 configurations:
+-@code{ms_struct} and @code{gcc_struct}.
++@smallexample
++typedef short __attribute__((__may_alias__)) short_a;
  
-+@node H8/300 Variable Attributes
- @subsection H8/300 Variable Attributes
+-@table @code
+-@item ms_struct
+-@itemx gcc_struct
+-@cindex @code{ms_struct} variable attribute, x86
+-@cindex @code{gcc_struct} variable attribute, x86
++int
++main (void)
++@{
++  int a = 0x12345678;
++  short_a *b = (short_a *) &a;
  
- These variable attributes are available for H8/300 targets:
-@@ -5440,6 +5830,7 @@ slightly under 32KB of data.
+-If @code{packed} is used on a structure, or if bit-fields are used,
+-it may be that the Microsoft ABI lays out the structure differently
+-than the way GCC normally does.  Particularly when moving packed
+-data between functions compiled with GCC and the native Microsoft compiler
+-(either via function call or as data in a file), it may be necessary to access
+-either format.
++  b[1] = 0;
+ 
+-Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86
+-compilers to match the native Microsoft compiler.
++  if (a == 0x12345678)
++    abort();
+ 
+-The Microsoft structure layout algorithm is fairly simple with the exception
+-of the bit-field packing.  
+-The padding and alignment of members of structures and whether a bit-field 
+-can straddle a storage-unit boundary are determine by these rules:
++  exit(0);
++@}
++@end smallexample
  
- @end table
+-@enumerate
+-@item Structure members are stored sequentially in the order in which they are
+-declared: the first member has the lowest memory address and the last member
+-the highest.
++@noindent
++If you replaced @code{short_a} with @code{short} in the variable
++declaration, the above program would abort when compiled with
++@option{-fstrict-aliasing}, which is on by default at @option{-O2} or
++above.
+ 
+-@item Every data object has an alignment requirement.  The alignment requirement
+-for all data except structures, unions, and arrays is either the size of the
+-object or the current packing size (specified with either the
+-@code{aligned} attribute or the @code{pack} pragma),
+-whichever is less.  For structures, unions, and arrays,
+-the alignment requirement is the largest alignment requirement of its members.
+-Every object is allocated an offset so that:
++@item packed
++@cindex @code{packed} type attribute
++This attribute, attached to @code{struct} or @code{union} type
++definition, specifies that each member (other than zero-width bit-fields)
++of the structure or union is placed to minimize the memory required.  When
++attached to an @code{enum} definition, it indicates that the smallest
++integral type should be used.
  
-+@node IA-64 Variable Attributes
- @subsection IA-64 Variable Attributes
+-@smallexample
+-offset % alignment_requirement == 0
+-@end smallexample
++@item transparent_union
++@cindex @code{transparent_union} type attribute
  
- The IA-64 back end supports the following variable attribute:
-@@ -5458,6 +5849,7 @@ defined by shared libraries.
+-@item Adjacent bit-fields are packed into the same 1-, 2-, or 4-byte allocation
+-unit if the integral types are the same size and if the next bit-field fits
+-into the current allocation unit without crossing the boundary imposed by the
+-common alignment requirements of the bit-fields.
+-@end enumerate
++This attribute, attached to a @code{union} type definition, indicates
++that any function parameter having that union type causes calls to that
++function to be treated in a special way.
  
- @end table
+-MSVC interprets zero-length bit-fields in the following ways:
++First, the argument corresponding to a transparent union type can be of
++any type in the union; no cast is required.  Also, if the union contains
++a pointer type, the corresponding argument can be a null pointer
++constant or a void pointer expression; and if the union contains a void
++pointer type, the corresponding argument can be any pointer expression.
++If the union member type is a pointer, qualifiers like @code{const} on
++the referenced type must be respected, just as with normal pointer
++conversions.
  
-+@node M32R/D Variable Attributes
- @subsection M32R/D Variable Attributes
+-@enumerate
+-@item If a zero-length bit-field is inserted between two bit-fields that
+-are normally coalesced, the bit-fields are not coalesced.
++Second, the argument is passed to the function using the calling
++conventions of the first member of the transparent union, not the calling
++conventions of the union itself.  All members of the union must have the
++same machine representation; this is necessary for this argument passing
++to work properly.
  
- One attribute is currently defined for the M32R/D@.
-@@ -5478,7 +5870,7 @@ Medium and large model objects may live anywhere in the 32-bit address space
- addresses).
- @end table
+-For example:
++Transparent unions are designed for library functions that have multiple
++interfaces for compatibility reasons.  For example, suppose the
++@code{wait} function must accept either a value of type @code{int *} to
++comply with POSIX, or a value of type @code{union wait *} to comply with
++the 4.1BSD interface.  If @code{wait}'s parameter were @code{void *},
++@code{wait} would accept both kinds of arguments, but it would also
++accept any other pointer type and this would make argument type checking
++less useful.  Instead, @code{<sys/wait.h>} might define the interface
++as follows:
  
--@anchor{MeP Variable Attributes}
-+@node MeP Variable Attributes
- @subsection MeP Variable Attributes
+ @smallexample
+-struct
+- @{
+-   unsigned long bf_1 : 12;
+-   unsigned long : 0;
+-   unsigned long bf_2 : 12;
+- @} t1;
++typedef union __attribute__ ((__transparent_union__))
++  @{
++    int *__ip;
++    union wait *__up;
++  @} wait_status_ptr_t;
++
++pid_t wait (wait_status_ptr_t);
+ @end smallexample
  
- The MeP target has a number of addressing modes and busses.  The
-@@ -5536,12 +5928,78 @@ Variables with the @code{cb} attribute are used to access the control
- bus, using special instructions.  @code{addr} indicates the control bus
- address.  Example:
+ @noindent
+-The size of @code{t1} is 8 bytes with the zero-length bit-field.  If the
+-zero-length bit-field were removed, @code{t1}'s size would be 4 bytes.
++This interface allows either @code{int *} or @code{union wait *}
++arguments to be passed, using the @code{int *} calling convention.
++The program can call @code{wait} with arguments of either type:
  
--@smallexample
--int cpu_clock __attribute__((cb(0x123)));
--@end smallexample
+-@item If a zero-length bit-field is inserted after a bit-field, @code{foo}, and the
+-alignment of the zero-length bit-field is greater than the member that follows it,
+-@code{bar}, @code{bar} is aligned as the type of the zero-length bit-field.
 +@smallexample
-+int cpu_clock __attribute__((cb(0x123)));
++int w1 () @{ int w; return wait (&w); @}
++int w2 () @{ union wait w; return wait (&w); @}
 +@end smallexample
-+
+ 
+-For example:
++@noindent
++With this interface, @code{wait}'s implementation might look like this:
+ 
+ @smallexample
+-struct
+- @{
+-   char foo : 4;
+-   short : 0;
+-   char bar;
+- @} t2;
+-
+-struct
+- @{
+-   char foo : 4;
+-   short : 0;
+-   double bar;
+- @} t3;
++pid_t wait (wait_status_ptr_t p)
++@{
++  return waitpid (-1, p.__ip, 0);
++@}
+ @end smallexample
+ 
+-@noindent
+-For @code{t2}, @code{bar} is placed at offset 2, rather than offset 1.
+-Accordingly, the size of @code{t2} is 4.  For @code{t3}, the zero-length
+-bit-field does not affect the alignment of @code{bar} or, as a result, the size
+-of the structure.
++@item unused
++@cindex @code{unused} type attribute
++When attached to a type (including a @code{union} or a @code{struct}),
++this attribute means that variables of that type are meant to appear
++possibly unused.  GCC does not produce a warning for any variables of
++that type, even if the variable appears to do nothing.  This is often
++the case with lock or thread classes, which are usually defined and then
++not referenced, but contain constructors and destructors that have
++nontrivial bookkeeping functions.
+ 
+-Taking this into account, it is important to note the following:
++@item visibility
++@cindex @code{visibility} type attribute
++In C++, attribute visibility (@pxref{Function Attributes}) can also be
++applied to class, struct, union and enum types.  Unlike other type
++attributes, the attribute must appear between the initial keyword and
++the name of the type; it cannot appear after the body of the type.
+ 
+-@enumerate
+-@item If a zero-length bit-field follows a normal bit-field, the type of the
+-zero-length bit-field may affect the alignment of the structure as whole. For
+-example, @code{t2} has a size of 4 bytes, since the zero-length bit-field follows a
+-normal bit-field, and is of type short.
++Note that the type visibility is applied to vague linkage entities
++associated with the class (vtable, typeinfo node, etc.).  In
++particular, if a class is thrown as an exception in one shared object
++and caught in another, the class must have default visibility.
++Otherwise the two shared objects are unable to use the same
++typeinfo node and exception handling will break.
+ 
+-@item Even if a zero-length bit-field is not followed by a normal bit-field, it may
+-still affect the alignment of the structure:
 +@end table
 +
-+@node Microsoft Windows Variable Attributes
-+@subsection Microsoft Windows Variable Attributes
-+
-+You can use these attributes on Microsoft Windows targets.
-+@ref{x86 Variable Attributes} for additional Windows compatibility
-+attributes available on all x86 targets.
-+
-+@table @code
-+@item dllimport
-+@itemx dllexport
-+@cindex @code{dllimport} variable attribute
-+@cindex @code{dllexport} variable attribute
-+The @code{dllimport} and @code{dllexport} attributes are described in
-+@ref{Microsoft Windows Function Attributes}.
++To specify multiple attributes, separate them by commas within the
++double parentheses: for example, @samp{__attribute__ ((aligned (16),
++packed))}.
 +
-+@item selectany
-+@cindex @code{selectany} variable attribute
-+The @code{selectany} attribute causes an initialized global variable to
-+have link-once semantics.  When multiple definitions of the variable are
-+encountered by the linker, the first is selected and the remainder are
-+discarded.  Following usage by the Microsoft compiler, the linker is told
-+@emph{not} to warn about size or content differences of the multiple
-+definitions.
-+
-+Although the primary usage of this attribute is for POD types, the
-+attribute can also be applied to global C++ objects that are initialized
-+by a constructor.  In this case, the static initialization and destruction
-+code for the object is emitted in each translation defining the object,
-+but the calls to the constructor and destructor are protected by a
-+link-once guard variable.
-+
-+The @code{selectany} attribute is only available on Microsoft Windows
-+targets.  You can use @code{__declspec (selectany)} as a synonym for
-+@code{__attribute__ ((selectany))} for compatibility with other
-+compilers.
++@node ARM Type Attributes
++@subsection ARM Type Attributes
 +
-+@item shared
-+@cindex @code{shared} variable attribute
-+On Microsoft Windows, in addition to putting variable definitions in a named
-+section, the section can also be shared among all running copies of an
-+executable or DLL@.  For example, this small program defines shared data
-+by putting it in a named section @code{shared} and marking the section
-+shareable:
++@cindex @code{notshared} type attribute, ARM
++On those ARM targets that support @code{dllimport} (such as Symbian
++OS), you can use the @code{notshared} attribute to indicate that the
++virtual table and other similar data for a class should not be
++exported from a DLL@.  For example:
 +
 +@smallexample
-+int foo __attribute__((section ("shared"), shared)) = 0;
-+
-+int
-+main()
-+@{
-+  /* @r{Read and write foo.  All running
-+     copies see the same value.}  */
-+  return 0;
++class __declspec(notshared) C @{
++public:
++  __declspec(dllimport) C();
++  virtual void f();
 +@}
-+@end smallexample
-+
-+@noindent
-+You may only use the @code{shared} attribute along with @code{section}
-+attribute with a fully-initialized global definition because of the way
-+linkers work.  See @code{section} attribute for more information.
-+
-+The @code{shared} attribute is only available on Microsoft Windows@.
  
- @end table
+-@smallexample
+-struct
+- @{
+-   char foo : 6;
+-   long : 0;
+- @} t4;
++__declspec(dllexport)
++C::C() @{@}
+ @end smallexample
  
-+@node PowerPC Variable Attributes
- @subsection PowerPC Variable Attributes
+ @noindent
+-Here, @code{t4} takes up 4 bytes.
+-@end enumerate
++In this code, @code{C::C} is exported from the current DLL, but the
++virtual table for @code{C} is not exported.  (You can use
++@code{__attribute__} instead of @code{__declspec} if you prefer, but
++most Symbian OS code uses @code{__declspec}.)
  
- Three attributes currently are defined for PowerPC configurations:
-@@ -5556,6 +6014,7 @@ documentation in @ref{x86 Variable Attributes}.
- For documentation of @code{altivec} attribute please see the
- documentation in @ref{PowerPC Type Attributes}.
+-@item Zero-length bit-fields following non-bit-field members are ignored:
++@node MeP Type Attributes
++@subsection MeP Type Attributes
  
-+@node SPU Variable Attributes
- @subsection SPU Variable Attributes
+-@smallexample
+-struct
+- @{
+-   char foo;
+-   long : 0;
+-   char bar;
+- @} t5;
+-@end smallexample
++@cindex @code{based} type attribute, MeP
++@cindex @code{tiny} type attribute, MeP
++@cindex @code{near} type attribute, MeP
++@cindex @code{far} type attribute, MeP
++Many of the MeP variable attributes may be applied to types as well.
++Specifically, the @code{based}, @code{tiny}, @code{near}, and
++@code{far} attributes may be applied to either.  The @code{io} and
++@code{cb} attributes may not be applied to types.
  
- @cindex @code{spu_vector} variable attribute, SPU
-@@ -5563,7 +6022,7 @@ The SPU supports the @code{spu_vector} attribute for variables.  For
- documentation of this attribute please see the documentation in
- @ref{SPU Type Attributes}.
+-@noindent
+-Here, @code{t5} takes up 2 bytes.
+-@end enumerate
+-@end table
++@node PowerPC Type Attributes
++@subsection PowerPC Type Attributes
  
--@anchor{x86 Variable Attributes}
-+@node x86 Variable Attributes
- @subsection x86 Variable Attributes
+-@subsection Xstormy16 Variable Attributes
++Three attributes currently are defined for PowerPC configurations:
++@code{altivec}, @code{ms_struct} and @code{gcc_struct}.
+ 
+-One attribute is currently defined for xstormy16 configurations:
+-@code{below100}.
++@cindex @code{ms_struct} type attribute, PowerPC
++@cindex @code{gcc_struct} type attribute, PowerPC
++For full documentation of the @code{ms_struct} and @code{gcc_struct}
++attributes please see the documentation in @ref{x86 Type Attributes}.
  
- Two attributes are currently defined for x86 configurations:
-@@ -5701,6 +6160,7 @@ Here, @code{t5} takes up 2 bytes.
- @end enumerate
- @end table
+-@table @code
+-@item below100
+-@cindex @code{below100} variable attribute, Xstormy16
++@cindex @code{altivec} type attribute, PowerPC
++The @code{altivec} attribute allows one to declare AltiVec vector data
++types supported by the AltiVec Programming Interface Manual.  The
++attribute requires an argument to specify one of three vector types:
++@code{vector__}, @code{pixel__} (always followed by unsigned short),
++and @code{bool__} (always followed by unsigned).
+ 
+-If a variable has the @code{below100} attribute (@code{BELOW100} is
+-allowed also), GCC places the variable in the first 0x100 bytes of
+-memory and use special opcodes to access it.  Such variables are
+-placed in either the @code{.bss_below100} section or the
+-@code{.data_below100} section.
++@smallexample
++__attribute__((altivec(vector__)))
++__attribute__((altivec(pixel__))) unsigned short
++__attribute__((altivec(bool__))) unsigned
++@end smallexample
  
-+@node Xstormy16 Variable Attributes
- @subsection Xstormy16 Variable Attributes
+-@end table
++These attributes mainly are intended to support the @code{__vector},
++@code{__pixel}, and @code{__bool} AltiVec keywords.
  
- One attribute is currently defined for xstormy16 configurations:
-@@ -5724,33 +6184,39 @@ placed in either the @code{.bss_below100} section or the
- @cindex type attributes
+-@node Type Attributes
+-@section Specifying Attributes of Types
+-@cindex attribute of types
+-@cindex type attributes
++@node SPU Type Attributes
++@subsection SPU Type Attributes
  
- The keyword @code{__attribute__} allows you to specify special
+-The keyword @code{__attribute__} allows you to specify special
 -attributes of @code{struct} and @code{union} types when you define
 -such types.  This keyword is followed by an attribute specification
 -inside double parentheses.  Eight attributes are currently defined for
 -types: @code{aligned}, @code{packed}, @code{transparent_union},
 -@code{unused}, @code{deprecated}, @code{visibility}, @code{may_alias}
 -and @code{bnd_variable_size}.  Other attributes are defined for
-+attributes of types.  Some type attributes apply only to @code{struct}
-+and @code{union} types, while others can apply to any type defined
-+via a @code{typedef} declaration.  Other attributes are defined for
- functions (@pxref{Function Attributes}), labels (@pxref{Label 
- Attributes}) and for variables (@pxref{Variable Attributes}).
+-functions (@pxref{Function Attributes}), labels (@pxref{Label 
+-Attributes}) and for variables (@pxref{Variable Attributes}).
++@cindex @code{spu_vector} type attribute, SPU
++The SPU supports the @code{spu_vector} attribute for types.  This attribute
++allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU
++Language Extensions Specification.  It is intended to support the
++@code{__vector} keyword.
  
 -You may also specify any one of these attributes with @samp{__}
 -preceding and following its keyword.  This allows you to use these
 -attributes in header files without being concerned about a possible
 -macro of the same name.  For example, you may use @code{__aligned__}
 -instead of @code{aligned}.
-+The @code{__attribute__} keyword is followed by an attribute specification
-+inside double parentheses.  
++@node x86 Type Attributes
++@subsection x86 Type Attributes
  
- You may specify type attributes in an enum, struct or union type
+-You may specify type attributes in an enum, struct or union type
 -declaration or definition, or for other types in a @code{typedef}
 -declaration.
--
++Two attributes are currently defined for x86 configurations:
++@code{ms_struct} and @code{gcc_struct}.
+ 
 -For an enum, struct or union type, you may specify attributes either
 -between the enum, struct or union tag and the name of the type, or
 -just past the closing curly brace of the @emph{definition}.  The
 -former syntax is preferred.
-+declaration or definition by placing them immediately after the
-+@code{struct}, @code{union} or @code{enum} keyword.  A less preferred
-+syntax is to place them just past the closing curly brace of the
-+definition.
- 
-+You can also include type attributes in a @code{typedef} declaration.
- @xref{Attribute Syntax}, for details of the exact syntax for using
- attributes.
++@table @code
  
-+@menu
-+* Common Type Attributes::
-+* ARM Type Attributes::
-+* MeP Type Attributes::
-+* PowerPC Type Attributes::
-+* SPU Type Attributes::
-+* x86 Type Attributes::
-+@end menu
-+
-+@node Common Type Attributes
-+@subsection Common Type Attributes
-+
-+The following type attributes are supported on most targets.
-+
- @table @code
- @cindex @code{aligned} type attribute
- @item aligned (@var{alignment})
-@@ -5831,14 +6297,6 @@ up to a maximum of 8-byte alignment, then specifying @code{aligned(16)}
- in an @code{__attribute__} still only provides you with 8-byte
- alignment.  See your linker documentation for further information.
+-@xref{Attribute Syntax}, for details of the exact syntax for using
+-attributes.
++@item ms_struct
++@itemx gcc_struct
++@cindex @code{ms_struct} type attribute, x86
++@cindex @code{gcc_struct} type attribute, x86
+ 
+-@table @code
+-@cindex @code{aligned} type attribute
+-@item aligned (@var{alignment})
+-This attribute specifies a minimum alignment (in bytes) for variables
+-of the specified type.  For example, the declarations:
++If @code{packed} is used on a structure, or if bit-fields are used
++it may be that the Microsoft ABI packs them differently
++than GCC normally packs them.  Particularly when moving packed
++data between functions compiled with GCC and the native Microsoft compiler
++(either via function call or as data in a file), it may be necessary to access
++either format.
+ 
+-@smallexample
+-struct S @{ short f[3]; @} __attribute__ ((aligned (8)));
+-typedef int more_aligned_int __attribute__ ((aligned (8)));
+-@end smallexample
++Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86
++compilers to match the native Microsoft compiler.
++@end table
+ 
+-@noindent
+-force the compiler to ensure (as far as it can) that each variable whose
+-type is @code{struct S} or @code{more_aligned_int} is allocated and
+-aligned @emph{at least} on a 8-byte boundary.  On a SPARC, having all
+-variables of type @code{struct S} aligned to 8-byte boundaries allows
+-the compiler to use the @code{ldd} and @code{std} (doubleword load and
+-store) instructions when copying one variable of type @code{struct S} to
+-another, thus improving run-time efficiency.
++@node Label Attributes
++@section Label Attributes
++@cindex Label Attributes
+ 
+-Note that the alignment of any given @code{struct} or @code{union} type
+-is required by the ISO C standard to be at least a perfect multiple of
+-the lowest common multiple of the alignments of all of the members of
+-the @code{struct} or @code{union} in question.  This means that you @emph{can}
+-effectively adjust the alignment of a @code{struct} or @code{union}
+-type by attaching an @code{aligned} attribute to any one of the members
+-of such a type, but the notation illustrated in the example above is a
+-more obvious, intuitive, and readable way to request the compiler to
+-adjust the alignment of an entire @code{struct} or @code{union} type.
++GCC allows attributes to be set on C labels.  @xref{Attribute Syntax}, for 
++details of the exact syntax for using attributes.  Other attributes are 
++available for functions (@pxref{Function Attributes}), variables 
++(@pxref{Variable Attributes}) and for types (@pxref{Type Attributes}).
+ 
+-As in the preceding example, you can explicitly specify the alignment
+-(in bytes) that you wish the compiler to use for a given @code{struct}
+-or @code{union} type.  Alternatively, you can leave out the alignment factor
+-and just ask the compiler to align a type to the maximum
+-useful alignment for the target machine you are compiling for.  For
+-example, you could write:
++This example uses the @code{cold} label attribute to indicate the 
++@code{ErrorHandling} branch is unlikely to be taken and that the
++@code{ErrorHandling} label is unused:
+ 
+ @smallexample
+-struct S @{ short f[3]; @} __attribute__ ((aligned));
+-@end smallexample
+ 
+-Whenever you leave out the alignment factor in an @code{aligned}
+-attribute specification, the compiler automatically sets the alignment
+-for the type to the largest alignment that is ever used for any data
+-type on the target machine you are compiling for.  Doing this can often
+-make copy operations more efficient, because the compiler can use
+-whatever instructions copy the biggest chunks of memory when performing
+-copies to or from the variables that have types that you have aligned
+-this way.
++   asm goto ("some asm" : : : : NoError);
+ 
+-In the example above, if the size of each @code{short} is 2 bytes, then
+-the size of the entire @code{struct S} type is 6 bytes.  The smallest
+-power of two that is greater than or equal to that is 8, so the
+-compiler sets the alignment for the entire @code{struct S} type to 8
+-bytes.
++/* This branch (the fall-through from the asm) is less commonly used */
++ErrorHandling: 
++   __attribute__((cold, unused)); /* Semi-colon is required here */
++   printf("error\n");
++   return 0;
+ 
+-Note that although you can ask the compiler to select a time-efficient
+-alignment for a given type and then declare only individual stand-alone
+-objects of that type, the compiler's ability to select a time-efficient
+-alignment is primarily useful only when you plan to create arrays of
+-variables having the relevant (efficiently aligned) type.  If you
+-declare or use arrays of variables of an efficiently-aligned type, then
+-it is likely that your program also does pointer arithmetic (or
+-subscripting, which amounts to the same thing) on pointers to the
+-relevant type, and the code that the compiler generates for these
+-pointer arithmetic operations is often more efficient for
+-efficiently-aligned types than for other types.
++NoError:
++   printf("no error\n");
++   return 1;
++@end smallexample
+ 
+-The @code{aligned} attribute can only increase the alignment; but you
+-can decrease it by specifying @code{packed} as well.  See below.
++@table @code
++@item unused
++@cindex @code{unused} label attribute
++This feature is intended for program-generated code that may contain 
++unused labels, but which is compiled with @option{-Wall}.  It is
++not normally appropriate to use in it human-written code, though it
++could be useful in cases where the code that jumps to the label is
++contained within an @code{#ifdef} conditional.
+ 
+-Note that the effectiveness of @code{aligned} attributes may be limited
+-by inherent limitations in your linker.  On many systems, the linker is
+-only able to arrange for variables to be aligned up to a certain maximum
+-alignment.  (For some linkers, the maximum supported alignment may
+-be very very small.)  If your linker is only able to align variables
+-up to a maximum of 8-byte alignment, then specifying @code{aligned(16)}
+-in an @code{__attribute__} still only provides you with 8-byte
+-alignment.  See your linker documentation for further information.
++@item hot
++@cindex @code{hot} label attribute
++The @code{hot} attribute on a label is used to inform the compiler that
++the path following the label is more likely than paths that are not so
++annotated.  This attribute is used in cases where @code{__builtin_expect}
++cannot be used, for instance with computed goto or @code{asm goto}.
  
 -@item packed
 -@cindex @code{packed} type attribute
@@ -4440,21 +5942,86 @@
 -of the structure or union is placed to minimize the memory required.  When
 -attached to an @code{enum} definition, it indicates that the smallest
 -integral type should be used.
--
- @opindex fshort-enums
- Specifying this attribute for @code{struct} and @code{union} types is
- equivalent to specifying the @code{packed} attribute on each of the
-@@ -5870,78 +6328,38 @@ You may only specify this attribute on the definition of an @code{enum},
- @code{struct} or @code{union}, not on a @code{typedef} that does not
- also define the enumerated type, structure or union.
++@item cold
++@cindex @code{cold} label attribute
++The @code{cold} attribute on labels is used to inform the compiler that
++the path following the label is unlikely to be executed.  This attribute
++is used in cases where @code{__builtin_expect} cannot be used, for instance
++with computed goto or @code{asm goto}.
+ 
+-@opindex fshort-enums
+-Specifying this attribute for @code{struct} and @code{union} types is
+-equivalent to specifying the @code{packed} attribute on each of the
+-structure or union members.  Specifying the @option{-fshort-enums}
+-flag on the line is equivalent to specifying the @code{packed}
+-attribute on all @code{enum} definitions.
++@end table
+ 
+-In the following example @code{struct my_packed_struct}'s members are
+-packed closely together, but the internal layout of its @code{s} member
+-is not packed---to do that, @code{struct my_unpacked_struct} needs to
+-be packed too.
++@node Attribute Syntax
++@section Attribute Syntax
++@cindex attribute syntax
+ 
+-@smallexample
+-struct my_unpacked_struct
+- @{
+-    char c;
+-    int i;
+- @};
++This section describes the syntax with which @code{__attribute__} may be
++used, and the constructs to which attribute specifiers bind, for the C
++language.  Some details may vary for C++ and Objective-C@.  Because of
++infelicities in the grammar for attributes, some forms described here
++may not be successfully parsed in all cases.
+ 
+-struct __attribute__ ((__packed__)) my_packed_struct
+-  @{
+-     char c;
+-     int  i;
+-     struct my_unpacked_struct s;
+-  @};
+-@end smallexample
++There are some problems with the semantics of attributes in C++.  For
++example, there are no manglings for attributes, although they may affect
++code generation, so problems may arise when attributed types are used in
++conjunction with templates or overloading.  Similarly, @code{typeid}
++does not distinguish between types with different attributes.  Support
++for attributes in C++ may be restricted in future to attributes on
++declarations only, but not on nested declarators.
++
++@xref{Function Attributes}, for details of the semantics of attributes
++applying to functions.  @xref{Variable Attributes}, for details of the
++semantics of attributes applying to variables.  @xref{Type Attributes},
++for details of the semantics of attributes applying to structure, union
++and enumerated types.
++@xref{Label Attributes}, for details of the semantics of attributes 
++applying to labels.
+ 
+-You may only specify this attribute on the definition of an @code{enum},
+-@code{struct} or @code{union}, not on a @code{typedef} that does not
+-also define the enumerated type, structure or union.
++An @dfn{attribute specifier} is of the form
++@code{__attribute__ ((@var{attribute-list}))}.  An @dfn{attribute list}
++is a possibly empty comma-separated sequence of @dfn{attributes}, where
++each attribute is one of the following:
  
 -@item transparent_union
 -@cindex @code{transparent_union} type attribute
--
++@itemize @bullet
++@item
++Empty.  Empty attributes are ignored.
+ 
 -This attribute, attached to a @code{union} type definition, indicates
 -that any function parameter having that union type causes calls to that
 -function to be treated in a special way.
--
++@item
++An attribute name
++(which may be an identifier such as @code{unused}, or a reserved
++word such as @code{const}).
+ 
 -First, the argument corresponding to a transparent union type can be of
 -any type in the union; no cast is required.  Also, if the union contains
 -a pointer type, the corresponding argument can be a null pointer
@@ -4463,13 +6030,20 @@
 -If the union member type is a pointer, qualifiers like @code{const} on
 -the referenced type must be respected, just as with normal pointer
 -conversions.
--
++@item
++An attribute name followed by a parenthesized list of
++parameters for the attribute.
++These parameters take one of the following forms:
+ 
 -Second, the argument is passed to the function using the calling
 -conventions of the first member of the transparent union, not the calling
 -conventions of the union itself.  All members of the union must have the
 -same machine representation; this is necessary for this argument passing
 -to work properly.
--
++@itemize @bullet
++@item
++An identifier.  For example, @code{mode} attributes use this form.
+ 
 -Transparent unions are designed for library functions that have multiple
 -interfaces for compatibility reasons.  For example, suppose the
 -@code{wait} function must accept either a value of type @code{int *} to
@@ -4479,57 +6053,62 @@
 -accept any other pointer type and this would make argument type checking
 -less useful.  Instead, @code{<sys/wait.h>} might define the interface
 -as follows:
--
++@item
++An identifier followed by a comma and a non-empty comma-separated list
++of expressions.  For example, @code{format} attributes use this form.
+ 
 -@smallexample
 -typedef union __attribute__ ((__transparent_union__))
 -  @{
 -    int *__ip;
 -    union wait *__up;
 -  @} wait_status_ptr_t;
--
++@item
++A possibly empty comma-separated list of expressions.  For example,
++@code{format_arg} attributes use this form with the list being a single
++integer constant expression, and @code{alias} attributes use this form
++with the list being a single string constant.
++@end itemize
++@end itemize
+ 
 -pid_t wait (wait_status_ptr_t);
 -@end smallexample
--
++An @dfn{attribute specifier list} is a sequence of one or more attribute
++specifiers, not separated by any other tokens.
+ 
 -@noindent
 -This interface allows either @code{int *} or @code{union wait *}
 -arguments to be passed, using the @code{int *} calling convention.
 -The program can call @code{wait} with arguments of either type:
-+@item bnd_variable_size
-+@cindex @code{bnd_variable_size} type attribute
-+@cindex Pointer Bounds Checker attributes
-+When applied to a structure field, this attribute tells Pointer
-+Bounds Checker that the size of this field should not be computed
-+using static type information.  It may be used to mark variably-sized
-+static array fields placed at the end of a structure.
++You may optionally specify attribute names with @samp{__}
++preceding and following the name.
++This allows you to use them in header files without
++being concerned about a possible macro of the same name.  For example,
++you may use the attribute name @code{__noreturn__} instead of @code{noreturn}.
  
- @smallexample
+-@smallexample
 -int w1 () @{ int w; return wait (&w); @}
 -int w2 () @{ union wait w; return wait (&w); @}
-+struct S
-+@{
-+  int size;
-+  char data[1];
-+@}
-+S *p = (S *)malloc (sizeof(S) + 100);
-+p->data[10] = 0; //Bounds violation
- @end smallexample
+-@end smallexample
  
- @noindent
+-@noindent
 -With this interface, @code{wait}'s implementation might look like this:
-+By using an attribute for the field we may avoid unwanted bound
-+violation checks:
++@subsubheading Label Attributes
  
- @smallexample
+-@smallexample
 -pid_t wait (wait_status_ptr_t p)
-+struct S
- @{
+-@{
 -  return waitpid (-1, p.__ip, 0);
-+  int size;
-+  char data[1] __attribute__((bnd_variable_size));
- @}
-+S *p = (S *)malloc (sizeof(S) + 100);
-+p->data[10] = 0; //OK
- @end smallexample
+-@}
+-@end smallexample
++In GNU C, an attribute specifier list may appear after the colon following a
++label, other than a @code{case} or @code{default} label.  GNU C++ only permits
++attributes on labels if the attribute specifier is immediately
++followed by a semicolon (i.e., the label applies to an empty
++statement).  If the semicolon is missing, C++ label attributes are
++ambiguous, as it is permissible for a declaration, which could begin
++with an attribute list, to be labelled in C++.  Declarations cannot be
++labelled in C90 or C99, so the ambiguity does not arise there.
  
 -@item unused
 -@cindex @code{unused} type attribute
@@ -4540,32 +6119,157 @@
 -the case with lock or thread classes, which are usually defined and then
 -not referenced, but contain constructors and destructors that have
 -nontrivial bookkeeping functions.
++@subsubheading Type Attributes
+ 
+-@item deprecated
+-@itemx deprecated (@var{msg})
+-@cindex @code{deprecated} type attribute
+-The @code{deprecated} attribute results in a warning if the type
+-is used anywhere in the source file.  This is useful when identifying
+-types that are expected to be removed in a future version of a program.
+-If possible, the warning also includes the location of the declaration
+-of the deprecated type, to enable users to easily find further
+-information about why the type is deprecated, or what they should do
+-instead.  Note that the warnings only occur for uses and then only
+-if the type is being applied to an identifier that itself is not being
+-declared as deprecated.
++An attribute specifier list may appear as part of a @code{struct},
++@code{union} or @code{enum} specifier.  It may go either immediately
++after the @code{struct}, @code{union} or @code{enum} keyword, or after
++the closing brace.  The former syntax is preferred.
++Where attribute specifiers follow the closing brace, they are considered
++to relate to the structure, union or enumerated type defined, not to any
++enclosing declaration the type specifier appears in, and the type
++defined is not complete until after the attribute specifiers.
++@c Otherwise, there would be the following problems: a shift/reduce
++@c conflict between attributes binding the struct/union/enum and
++@c binding to the list of specifiers/qualifiers; and "aligned"
++@c attributes could use sizeof for the structure, but the size could be
++@c changed later by "packed" attributes.
+ 
+-@smallexample
+-typedef int T1 __attribute__ ((deprecated));
+-T1 x;
+-typedef T1 T2;
+-T2 y;
+-typedef T1 T3 __attribute__ ((deprecated));
+-T3 z __attribute__ ((deprecated));
+-@end smallexample
+ 
+-@noindent
+-results in a warning on line 2 and 3 but not lines 4, 5, or 6.  No
+-warning is issued for line 4 because T2 is not explicitly
+-deprecated.  Line 5 has no warning because T3 is explicitly
+-deprecated.  Similarly for line 6.  The optional @var{msg}
+-argument, which must be a string, is printed in the warning if
+-present.
++@subsubheading All other attributes
+ 
+-The @code{deprecated} attribute can also be used for functions and
+-variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.)
++Otherwise, an attribute specifier appears as part of a declaration,
++counting declarations of unnamed parameters and type names, and relates
++to that declaration (which may be nested in another declaration, for
++example in the case of a parameter declaration), or to a particular declarator
++within a declaration.  Where an
++attribute specifier is applied to a parameter declared as a function or
++an array, it should apply to the function or array rather than the
++pointer to which the parameter is implicitly converted, but this is not
++yet correctly implemented.
+ 
+-@item may_alias
+-@cindex @code{may_alias} type attribute
+-Accesses through pointers to types with this attribute are not subject
+-to type-based alias analysis, but are instead assumed to be able to alias
+-any other type of objects.
+-In the context of section 6.5 paragraph 7 of the C99 standard,
+-an lvalue expression
+-dereferencing such a pointer is treated like having a character type.
+-See @option{-fstrict-aliasing} for more information on aliasing issues.
+-This extension exists to support some vector APIs, in which pointers to
+-one vector type are permitted to alias pointers to a different vector type.
++Any list of specifiers and qualifiers at the start of a declaration may
++contain attribute specifiers, whether or not such a list may in that
++context contain storage class specifiers.  (Some attributes, however,
++are essentially in the nature of storage class specifiers, and only make
++sense where storage class specifiers may be used; for example,
++@code{section}.)  There is one necessary limitation to this syntax: the
++first old-style parameter declaration in a function definition cannot
++begin with an attribute specifier, because such an attribute applies to
++the function instead by syntax described below (which, however, is not
++yet implemented in this case).  In some other cases, attribute
++specifiers are permitted by this grammar but not yet supported by the
++compiler.  All attribute specifiers in this place relate to the
++declaration as a whole.  In the obsolescent usage where a type of
++@code{int} is implied by the absence of type specifiers, such a list of
++specifiers and qualifiers may be an attribute specifier list with no
++other specifiers or qualifiers.
+ 
+-Note that an object of a type with this attribute does not have any
+-special semantics.
++At present, the first parameter in a function prototype must have some
++type specifier that is not an attribute specifier; this resolves an
++ambiguity in the interpretation of @code{void f(int
++(__attribute__((foo)) x))}, but is subject to change.  At present, if
++the parentheses of a function declarator contain only attributes then
++those attributes are ignored, rather than yielding an error or warning
++or implying a single parameter of type int, but this is subject to
++change.
+ 
+-Example of use:
++An attribute specifier list may appear immediately before a declarator
++(other than the first) in a comma-separated list of declarators in a
++declaration of more than one identifier using a single list of
++specifiers and qualifiers.  Such attribute specifiers apply
++only to the identifier before whose declarator they appear.  For
++example, in
+ 
+ @smallexample
+-typedef short __attribute__((__may_alias__)) short_a;
 -
- @item deprecated
- @itemx deprecated (@var{msg})
- @cindex @code{deprecated} type attribute
-@@ -5975,6 +6393,18 @@ present.
- The @code{deprecated} attribute can also be used for functions and
- variables (@pxref{Function Attributes}, @pxref{Variable Attributes}.)
+-int
+-main (void)
+-@{
+-  int a = 0x12345678;
+-  short_a *b = (short_a *) &a;
++__attribute__((noreturn)) void d0 (void),
++    __attribute__((format(printf, 1, 2))) d1 (const char *, ...),
++     d2 (void);
++@end smallexample
  
-+@item designated_init
-+@cindex @code{designated_init} type attribute
-+This attribute may only be applied to structure types.  It indicates
-+that any initialization of an object of this type must use designated
-+initializers rather than positional initializers.  The intent of this
-+attribute is to allow the programmer to indicate that a structure's
-+layout may change, and that therefore relying on positional
-+initialization will result in future breakage.
-+
-+GCC emits warnings based on this attribute by default; use
-+@option{-Wno-designated-init} to suppress them.
-+
- @item may_alias
- @cindex @code{may_alias} type attribute
- Accesses through pointers to types with this attribute are not subject
-@@ -6016,70 +6446,107 @@ declaration, the above program would abort when compiled with
- @option{-fstrict-aliasing}, which is on by default at @option{-O2} or
- above.
+-  b[1] = 0;
++@noindent
++the @code{noreturn} attribute applies to all the functions
++declared; the @code{format} attribute only applies to @code{d1}.
+ 
+-  if (a == 0x12345678)
+-    abort();
++An attribute specifier list may appear immediately before the comma,
++@code{=} or semicolon terminating the declaration of an identifier other
++than a function definition.  Such attribute specifiers apply
++to the declared object or function.  Where an
++assembler name for an object or function is specified (@pxref{Asm
++Labels}), the attribute must follow the @code{asm}
++specification.
+ 
+-  exit(0);
+-@}
+-@end smallexample
++An attribute specifier list may, in future, be permitted to appear after
++the declarator in a function definition (before any old-style parameter
++declarations or the function body).
+ 
+-@noindent
+-If you replaced @code{short_a} with @code{short} in the variable
+-declaration, the above program would abort when compiled with
+-@option{-fstrict-aliasing}, which is on by default at @option{-O2} or
+-above.
++Attribute specifiers may be mixed with type qualifiers appearing inside
++the @code{[]} of a parameter array declarator, in the C99 construct by
++which such qualifiers are applied to the pointer to which the array is
++implicitly converted.  Such attribute specifiers apply to the pointer,
++not to the array, but at present this is not implemented and they are
++ignored.
  
 -@item visibility
 -@cindex @code{visibility} type attribute
@@ -4573,13 +6277,15 @@
 -applied to class, struct, union and enum types.  Unlike other type
 -attributes, the attribute must appear between the initial keyword and
 -the name of the type; it cannot appear after the body of the type.
-+@item packed
-+@cindex @code{packed} type attribute
-+This attribute, attached to @code{struct} or @code{union} type
-+definition, specifies that each member (other than zero-width bit-fields)
-+of the structure or union is placed to minimize the memory required.  When
-+attached to an @code{enum} definition, it indicates that the smallest
-+integral type should be used.
++An attribute specifier list may appear at the start of a nested
++declarator.  At present, there are some limitations in this usage: the
++attributes correctly apply to the declarator, but for most individual
++attributes the semantics this implies are not implemented.
++When attribute specifiers follow the @code{*} of a pointer
++declarator, they may be mixed with any type qualifiers present.
++The following describes the formal semantics of this syntax.  It makes the
++most sense if you are familiar with the formal specification of
++declarators in the ISO C standard.
  
 -Note that the type visibility is applied to vague linkage entities
 -associated with the class (vtable, typeinfo node, etc.).  In
@@ -4587,8 +6293,12 @@
 -and caught in another, the class must have default visibility.
 -Otherwise the two shared objects are unable to use the same
 -typeinfo node and exception handling will break.
-+@item transparent_union
-+@cindex @code{transparent_union} type attribute
++Consider (as in C99 subclause 6.7.5 paragraph 4) a declaration @code{T
++D1}, where @code{T} contains declaration specifiers that specify a type
++@var{Type} (such as @code{int}) and @code{D1} is a declarator that
++contains an identifier @var{ident}.  The type specified for @var{ident}
++for derived declarators whose type does not include an attribute
++specifier is as in the ISO C standard.
  
 -@item designated_init
 -@cindex @code{designated_init} type attribute
@@ -4598,20 +6308,21 @@
 -attribute is to allow the programmer to indicate that a structure's
 -layout may change, and that therefore relying on positional
 -initialization will result in future breakage.
-+This attribute, attached to a @code{union} type definition, indicates
-+that any function parameter having that union type causes calls to that
-+function to be treated in a special way.
++If @code{D1} has the form @code{( @var{attribute-specifier-list} D )},
++and the declaration @code{T D} specifies the type
++``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then
++@code{T D1} specifies the type ``@var{derived-declarator-type-list}
++@var{attribute-specifier-list} @var{Type}'' for @var{ident}.
  
 -GCC emits warnings based on this attribute by default; use
 -@option{-Wno-designated-init} to suppress them.
-+First, the argument corresponding to a transparent union type can be of
-+any type in the union; no cast is required.  Also, if the union contains
-+a pointer type, the corresponding argument can be a null pointer
-+constant or a void pointer expression; and if the union contains a void
-+pointer type, the corresponding argument can be any pointer expression.
-+If the union member type is a pointer, qualifiers like @code{const} on
-+the referenced type must be respected, just as with normal pointer
-+conversions.
++If @code{D1} has the form @code{*
++@var{type-qualifier-and-attribute-specifier-list} D}, and the
++declaration @code{T D} specifies the type
++``@var{derived-declarator-type-list} @var{Type}'' for @var{ident}, then
++@code{T D1} specifies the type ``@var{derived-declarator-type-list}
++@var{type-qualifier-and-attribute-specifier-list} pointer to @var{Type}'' for
++@var{ident}.
  
 -@item bnd_variable_size
 -@cindex @code{bnd_variable_size} type attribute
@@ -4620,21 +6331,7 @@
 -Bounds Checker that the size of this field should not be computed
 -using static type information.  It may be used to mark variably-sized
 -static array fields placed at the end of a structure.
-+Second, the argument is passed to the function using the calling
-+conventions of the first member of the transparent union, not the calling
-+conventions of the union itself.  All members of the union must have the
-+same machine representation; this is necessary for this argument passing
-+to work properly.
-+
-+Transparent unions are designed for library functions that have multiple
-+interfaces for compatibility reasons.  For example, suppose the
-+@code{wait} function must accept either a value of type @code{int *} to
-+comply with POSIX, or a value of type @code{union wait *} to comply with
-+the 4.1BSD interface.  If @code{wait}'s parameter were @code{void *},
-+@code{wait} would accept both kinds of arguments, but it would also
-+accept any other pointer type and this would make argument type checking
-+less useful.  Instead, @code{<sys/wait.h>} might define the interface
-+as follows:
++For example,
  
  @smallexample
 -struct S
@@ -4644,113 +6341,224 @@
 -@}
 -S *p = (S *)malloc (sizeof(S) + 100);
 -p->data[10] = 0; //Bounds violation
-+typedef union __attribute__ ((__transparent_union__))
-+  @{
-+    int *__ip;
-+    union wait *__up;
-+  @} wait_status_ptr_t;
-+
-+pid_t wait (wait_status_ptr_t);
++void (__attribute__((noreturn)) ****f) (void);
  @end smallexample
  
  @noindent
 -By using an attribute for the field we may avoid unwanted bound
 -violation checks:
-+This interface allows either @code{int *} or @code{union wait *}
-+arguments to be passed, using the @code{int *} calling convention.
-+The program can call @code{wait} with arguments of either type:
++specifies the type ``pointer to pointer to pointer to pointer to
++non-returning function returning @code{void}''.  As another example,
  
  @smallexample
 -struct S
-+int w1 () @{ int w; return wait (&w); @}
-+int w2 () @{ union wait w; return wait (&w); @}
-+@end smallexample
-+
-+@noindent
-+With this interface, @code{wait}'s implementation might look like this:
-+
-+@smallexample
-+pid_t wait (wait_status_ptr_t p)
- @{
+-@{
 -  int size;
 -  char data[1] __attribute__((bnd_variable_size));
-+  return waitpid (-1, p.__ip, 0);
- @}
+-@}
 -S *p = (S *)malloc (sizeof(S) + 100);
 -p->data[10] = 0; //OK
++char *__attribute__((aligned(8))) *f;
  @end smallexample
  
-+@item unused
-+@cindex @code{unused} type attribute
-+When attached to a type (including a @code{union} or a @code{struct}),
-+this attribute means that variables of that type are meant to appear
-+possibly unused.  GCC does not produce a warning for any variables of
-+that type, even if the variable appears to do nothing.  This is often
-+the case with lock or thread classes, which are usually defined and then
-+not referenced, but contain constructors and destructors that have
-+nontrivial bookkeeping functions.
-+
-+@item visibility
-+@cindex @code{visibility} type attribute
-+In C++, attribute visibility (@pxref{Function Attributes}) can also be
-+applied to class, struct, union and enum types.  Unlike other type
-+attributes, the attribute must appear between the initial keyword and
-+the name of the type; it cannot appear after the body of the type.
-+
-+Note that the type visibility is applied to vague linkage entities
-+associated with the class (vtable, typeinfo node, etc.).  In
-+particular, if a class is thrown as an exception in one shared object
-+and caught in another, the class must have default visibility.
-+Otherwise the two shared objects are unable to use the same
-+typeinfo node and exception handling will break.
-+
- @end table
- 
- To specify multiple attributes, separate them by commas within the
- double parentheses: for example, @samp{__attribute__ ((aligned (16),
- packed))}.
- 
-+@node ARM Type Attributes
- @subsection ARM Type Attributes
- 
- @cindex @code{notshared} type attribute, ARM
-@@ -6105,7 +6572,7 @@ virtual table for @code{C} is not exported.  (You can use
- @code{__attribute__} instead of @code{__declspec} if you prefer, but
- most Symbian OS code uses @code{__declspec}.)
+-@end table
++@noindent
++specifies the type ``pointer to 8-byte-aligned pointer to @code{char}''.
++Note again that this does not work with most attributes; for example,
++the usage of @samp{aligned} and @samp{noreturn} attributes given above
++is not yet supported.
+ 
+-To specify multiple attributes, separate them by commas within the
+-double parentheses: for example, @samp{__attribute__ ((aligned (16),
+-packed))}.
++For compatibility with existing code written for compiler versions that
++did not implement attributes on nested declarators, some laxity is
++allowed in the placing of attributes.  If an attribute that only applies
++to types is applied to a declaration, it is treated as applying to
++the type of that declaration.  If an attribute that only applies to
++declarations is applied to the type of a declaration, it is treated
++as applying to that declaration; and, for compatibility with code
++placing the attributes immediately before the identifier declared, such
++an attribute applied to a function return type is treated as
++applying to the function type, and such an attribute applied to an array
++element type is treated as applying to the array type.  If an
++attribute that only applies to function types is applied to a
++pointer-to-function type, it is treated as applying to the pointer
++target type; if such an attribute is applied to a function return type
++that is not a pointer-to-function type, it is treated as applying
++to the function type.
+ 
+-@subsection ARM Type Attributes
++@node Function Prototypes
++@section Prototypes and Old-Style Function Definitions
++@cindex function prototype declarations
++@cindex old-style function definitions
++@cindex promotion of formal parameters
+ 
+-@cindex @code{notshared} type attribute, ARM
+-On those ARM targets that support @code{dllimport} (such as Symbian
+-OS), you can use the @code{notshared} attribute to indicate that the
+-virtual table and other similar data for a class should not be
+-exported from a DLL@.  For example:
++GNU C extends ISO C to allow a function prototype to override a later
++old-style non-prototype definition.  Consider the following example:
  
+ @smallexample
+-class __declspec(notshared) C @{
+-public:
+-  __declspec(dllimport) C();
+-  virtual void f();
+-@}
+-
+-__declspec(dllexport)
+-C::C() @{@}
+-@end smallexample
+-
+-@noindent
+-In this code, @code{C::C} is exported from the current DLL, but the
+-virtual table for @code{C} is not exported.  (You can use
+-@code{__attribute__} instead of @code{__declspec} if you prefer, but
+-most Symbian OS code uses @code{__declspec}.)
+-
 -@anchor{MeP Type Attributes}
-+@node MeP Type Attributes
- @subsection MeP Type Attributes
- 
- @cindex @code{based} type attribute, MeP
-@@ -6117,7 +6584,7 @@ Specifically, the @code{based}, @code{tiny}, @code{near}, and
- @code{far} attributes may be applied to either.  The @code{io} and
- @code{cb} attributes may not be applied to types.
+-@subsection MeP Type Attributes
+-
+-@cindex @code{based} type attribute, MeP
+-@cindex @code{tiny} type attribute, MeP
+-@cindex @code{near} type attribute, MeP
+-@cindex @code{far} type attribute, MeP
+-Many of the MeP variable attributes may be applied to types as well.
+-Specifically, the @code{based}, @code{tiny}, @code{near}, and
+-@code{far} attributes may be applied to either.  The @code{io} and
+-@code{cb} attributes may not be applied to types.
++/* @r{Use prototypes unless the compiler is old-fashioned.}  */
++#ifdef __STDC__
++#define P(x) x
++#else
++#define P(x) ()
++#endif
  
 -@anchor{PowerPC Type Attributes}
-+@node PowerPC Type Attributes
- @subsection PowerPC Type Attributes
+-@subsection PowerPC Type Attributes
++/* @r{Prototype function declaration.}  */
++int isroot P((uid_t));
+ 
+-Three attributes currently are defined for PowerPC configurations:
+-@code{altivec}, @code{ms_struct} and @code{gcc_struct}.
++/* @r{Old-style function definition.}  */
++int
++isroot (x)   /* @r{??? lossage here ???} */
++     uid_t x;
++@{
++  return x == 0;
++@}
++@end smallexample
+ 
+-@cindex @code{ms_struct} type attribute, PowerPC
+-@cindex @code{gcc_struct} type attribute, PowerPC
+-For full documentation of the @code{ms_struct} and @code{gcc_struct}
+-attributes please see the documentation in @ref{x86 Type Attributes}.
++Suppose the type @code{uid_t} happens to be @code{short}.  ISO C does
++not allow this example, because subword arguments in old-style
++non-prototype definitions are promoted.  Therefore in this example the
++function definition's argument is really an @code{int}, which does not
++match the prototype argument type of @code{short}.
+ 
+-@cindex @code{altivec} type attribute, PowerPC
+-The @code{altivec} attribute allows one to declare AltiVec vector data
+-types supported by the AltiVec Programming Interface Manual.  The
+-attribute requires an argument to specify one of three vector types:
+-@code{vector__}, @code{pixel__} (always followed by unsigned short),
+-and @code{bool__} (always followed by unsigned).
++This restriction of ISO C makes it hard to write code that is portable
++to traditional C compilers, because the programmer does not know
++whether the @code{uid_t} type is @code{short}, @code{int}, or
++@code{long}.  Therefore, in cases like these GNU C allows a prototype
++to override a later old-style definition.  More precisely, in GNU C, a
++function prototype argument type overrides the argument type specified
++by a later old-style definition if the former type is the same as the
++latter type before promotion.  Thus in GNU C the above example is
++equivalent to the following:
  
- Three attributes currently are defined for PowerPC configurations:
-@@ -6144,7 +6611,7 @@ __attribute__((altivec(bool__))) unsigned
- These attributes mainly are intended to support the @code{__vector},
- @code{__pixel}, and @code{__bool} AltiVec keywords.
+ @smallexample
+-__attribute__((altivec(vector__)))
+-__attribute__((altivec(pixel__))) unsigned short
+-__attribute__((altivec(bool__))) unsigned
+-@end smallexample
+-
+-These attributes mainly are intended to support the @code{__vector},
+-@code{__pixel}, and @code{__bool} AltiVec keywords.
++int isroot (uid_t);
  
 -@anchor{SPU Type Attributes}
-+@node SPU Type Attributes
- @subsection SPU Type Attributes
+-@subsection SPU Type Attributes
++int
++isroot (uid_t x)
++@{
++  return x == 0;
++@}
++@end smallexample
  
- @cindex @code{spu_vector} type attribute, SPU
-@@ -6153,7 +6620,7 @@ allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU
- Language Extensions Specification.  It is intended to support the
- @code{__vector} keyword.
+-@cindex @code{spu_vector} type attribute, SPU
+-The SPU supports the @code{spu_vector} attribute for types.  This attribute
+-allows one to declare vector data types supported by the Sony/Toshiba/IBM SPU
+-Language Extensions Specification.  It is intended to support the
+-@code{__vector} keyword.
++@noindent
++GNU C++ does not support old-style function definitions, so this
++extension is irrelevant.
  
 -@anchor{x86 Type Attributes}
-+@node x86 Type Attributes
- @subsection x86 Type Attributes
+-@subsection x86 Type Attributes
++@node C++ Comments
++@section C++ Style Comments
++@cindex @code{//}
++@cindex C++ comments
++@cindex comments, C++ style
+ 
+-Two attributes are currently defined for x86 configurations:
+-@code{ms_struct} and @code{gcc_struct}.
++In GNU C, you may use C++ style comments, which start with @samp{//} and
++continue until the end of the line.  Many other C implementations allow
++such comments, and they are included in the 1999 C standard.  However,
++C++ style comments are not recognized if you specify an @option{-std}
++option specifying a version of ISO C before C99, or @option{-ansi}
++(equivalent to @option{-std=c90}).
  
- Two attributes are currently defined for x86 configurations:
-@@ -8213,15 +8680,19 @@ identifier, or a sequence of member accesses and array references.
+-@table @code
++@node Dollar Signs
++@section Dollar Signs in Identifier Names
++@cindex $
++@cindex dollar signs in identifier names
++@cindex identifier names, dollar signs in
+ 
+-@item ms_struct
+-@itemx gcc_struct
+-@cindex @code{ms_struct} type attribute, x86
+-@cindex @code{gcc_struct} type attribute, x86
++In GNU C, you may normally use dollar signs in identifier names.
++This is because many traditional C implementations allow such identifiers.
++However, dollar signs in identifiers are not supported on a few target
++machines, typically because the target assembler does not allow them.
+ 
+-If @code{packed} is used on a structure, or if bit-fields are used
+-it may be that the Microsoft ABI packs them differently
+-than GCC normally packs them.  Particularly when moving packed
+-data between functions compiled with GCC and the native Microsoft compiler
+-(either via function call or as data in a file), it may be necessary to access
+-either format.
++@node Character Escapes
++@section The Character @key{ESC} in Constants
+ 
+-Currently @option{-m[no-]ms-bitfields} is provided for the Microsoft Windows x86
+-compilers to match the native Microsoft compiler.
+-@end table
++You can use the sequence @samp{\e} in a string or character constant to
++stand for the ASCII character @key{ESC}.
+ 
+ @node Alignment
+ @section Inquiring on Alignment of Types or Variables
+@@ -8213,15 +8702,19 @@ identifier, or a sequence of member accesses and array references.
  The following built-in functions
  are intended to be compatible with those described
  in the @cite{Intel Itanium Processor-specific Application Binary Interface},
@@ -4774,7 +6582,7 @@
  Not all operations are supported by all target processors.  If a particular
  operation cannot be implemented on the target processor, a warning is
  generated and a call to an external function is generated.  The external
-@@ -8243,11 +8714,10 @@ after the operation.
+@@ -8243,11 +8736,10 @@ after the operation.
  All of the routines are described in the Intel documentation to take
  ``an optional list of variables protected by the memory barrier''.  It's
  not clear what is meant by that; it could mean that @emph{only} the
@@ -4790,7 +6598,7 @@
  
  @table @code
  @item @var{type} __sync_fetch_and_add (@var{type} *ptr, @var{type} value, ...)
-@@ -8350,45 +8820,47 @@ are not prevented from being speculated to before the barrier.
+@@ -8350,45 +8842,47 @@ are not prevented from being speculated to before the barrier.
  @node __atomic Builtins
  @section Built-in Functions for Memory Model Aware Atomic Operations
  
@@ -4863,7 +6671,7 @@
  
  @table  @code
  @item __ATOMIC_RELAXED
-@@ -8403,13 +8875,32 @@ semantic stores from another thread.
+@@ -8403,13 +8897,32 @@ semantic stores from another thread.
  Barrier to sinking of code and synchronizes with acquire (or stronger)
  semantic loads from another thread.
  @item __ATOMIC_ACQ_REL
@@ -4898,7 +6706,7 @@
  When implementing patterns for these built-in functions, the memory model
  parameter can be ignored as long as the pattern implements the most
  restrictive @code{__ATOMIC_SEQ_CST} model.  Any of the other memory models
-@@ -8480,19 +8971,20 @@ of @code{*@var{ptr}} is copied into @code{*@var{ret}}.
+@@ -8480,19 +8993,20 @@ of @code{*@var{ptr}} is copied into @code{*@var{ret}}.
  @deftypefn {Built-in Function} bool __atomic_compare_exchange_n (@var{type} *ptr, @var{type} *expected, @var{type} desired, bool weak, int success_memmodel, int failure_memmodel)
  This built-in function implements an atomic compare and exchange operation.
  This compares the contents of @code{*@var{ptr}} with the contents of
@@ -4957,49 +6765,175 @@
  -tno-android-cc -tno-android-ld}
  
  @emph{H8/300 Options}
-@@ -12326,8 +12326,12 @@ corresponding flag to the linker.
+@@ -12259,7 +12259,10 @@ Generate big-endian code.  This is the default when GCC is configured for an
+ 
+ @item -mgeneral-regs-only
+ @opindex mgeneral-regs-only
+-Generate code which uses only the general registers.
++Generate code which uses only the general-purpose registers.  This is equivalent
++to feature modifier @option{nofp} of @option{-march} or @option{-mcpu}, except
++that @option{-mgeneral-regs-only} takes precedence over any conflicting feature
++modifier regardless of sequence.
+ 
+ @item -mlittle-endian
+ @opindex mlittle-endian
+@@ -12326,20 +12329,26 @@ corresponding flag to the linker.
+ @opindex march
  Specify the name of the target architecture, optionally suffixed by one or
  more feature modifiers.  This option has the form
- @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
+-@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
 -only permissible value for @var{arch} is @samp{armv8-a}.  The permissible
 -values for @var{feature} are documented in the sub-section below.
-+only permissible value for @var{arch} is @samp{armv8-a}.
-+The permissible values for @var{feature} are documented in the sub-section
-+below.  Additionally on native AArch64 GNU/Linux systems the value
++@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}.
+ 
+-Where conflicting feature modifiers are specified, the right-most feature is
+-used.
++The permissible values for @var{arch} are @samp{armv8-a} or
++@samp{armv8.1-a}.
+ 
+-GCC uses this name to determine what kind of instructions it can emit when
+-generating assembly code.
++For the permissible values for @var{feature}, see the sub-section on
++@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
++Feature Modifiers}.  Where conflicting feature modifiers are
++specified, the right-most feature is used.
+ 
+-Where @option{-march} is specified without either of @option{-mtune}
+-or @option{-mcpu} also being specified, the code is tuned to perform
+-well across a range of target processors implementing the target
+-architecture.
++Additionally on native AArch64 GNU/Linux systems the value
 +@samp{native} is available.  This option causes the compiler to pick the
 +architecture of the host system.  If the compiler is unable to recognize the
 +architecture of the host system this option has no effect.
- 
- Where conflicting feature modifiers are specified, the right-most feature is
- used.
-@@ -12351,6 +12355,13 @@ Additionally, this option can specify that GCC should tune the performance
++
++GCC uses @var{name} to determine what kind of instructions it can emit
++when generating assembly code.  If @option{-march} is specified
++without either of @option{-mtune} or @option{-mcpu} also being
++specified, the code is tuned to perform well across a range of target
++processors implementing the target architecture.
+ 
+ @item -mtune=@var{name}
+ @opindex mtune
+@@ -12352,6 +12361,12 @@ Additionally, this option can specify that GCC should tune the performance
  of the code for a big.LITTLE system.  Permissible values for this
  option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}.
  
-+Additionally on native AArch64 GNU/Linux systems the value @samp{native}
-+is available.
-+This option causes the compiler to pick the architecture of and tune the
-+performance of the code for the processor of the host system.
-+If the compiler is unable to recognize the processor of the host system
-+this option has no effect.
++Additionally on native AArch64 GNU/Linux systems the value
++@samp{native} is available.  This option causes the compiler to pick
++the architecture of and tune the performance of the code for the
++processor of the host system.  If the compiler is unable to recognize
++the processor of the host system this option has no effect.
 +
  Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=}
  are specified, the code is tuned to perform well across a range
  of target processors.
-@@ -12363,7 +12374,11 @@ Specify the name of the target processor, optionally suffixed by one or more
- feature modifiers.  This option has the form
- @option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
- permissible values for @var{cpu} are the same as those available for
+@@ -12360,45 +12375,75 @@ This option cannot be suffixed by feature modifiers.
+ 
+ @item -mcpu=@var{name}
+ @opindex mcpu
+-Specify the name of the target processor, optionally suffixed by one or more
+-feature modifiers.  This option has the form
+-@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
+-permissible values for @var{cpu} are the same as those available for
 -@option{-mtune}.
-+@option{-mtune}.  Additionally on native AArch64 GNU/Linux systems the
-+value @samp{native} is available.
-+This option causes the compiler to tune the performance of the code for the
-+processor of the host system.  If the compiler is unable to recognize the
-+processor of the host system this option has no effect.
- 
- The permissible values for @var{feature} are documented in the sub-section
- below.
-@@ -13207,9 +13222,9 @@ Permissible names are: @samp{arm2}, @samp{arm250},
+-
+-The permissible values for @var{feature} are documented in the sub-section
+-below.
+-
+-Where conflicting feature modifiers are specified, the right-most feature is
+-used.
+-
+-GCC uses this name to determine what kind of instructions it can emit when
++Specify the name of the target processor, optionally suffixed by one
++or more feature modifiers.  This option has the form
++@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where
++the permissible values for @var{cpu} are the same as those available
++for @option{-mtune}.  The permissible values for @var{feature} are
++documented in the sub-section on
++@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
++Feature Modifiers}.  Where conflicting feature modifiers are
++specified, the right-most feature is used.
++
++Additionally on native AArch64 GNU/Linux systems the value
++@samp{native} is available.  This option causes the compiler to tune
++the performance of the code for the processor of the host system.  If
++the compiler is unable to recognize the processor of the host system
++this option has no effect.
++
++GCC uses @var{name} to determine what kind of instructions it can emit when
+ generating assembly code (as if by @option{-march}) and to determine
+ the target processor for which to tune for performance (as if
+ by @option{-mtune}).  Where this option is used in conjunction
+ with @option{-march} or @option{-mtune}, those options take precedence
+ over the appropriate part of this option.
++
++@item -moverride=@var{string}
++@opindex moverride
++Override tuning decisions made by the back-end in response to a
++@option{-mtune=} switch.  The syntax, semantics, and accepted values
++for @var{string} in this option are not guaranteed to be consistent
++across releases.
++
++This option is only intended to be useful when developing GCC.
+ @end table
+ 
+ @subsubsection @option{-march} and @option{-mcpu} Feature Modifiers
++@anchor{aarch64-feature-modifiers}
+ @cindex @option{-march} feature modifiers
+ @cindex @option{-mcpu} feature modifiers
+-Feature modifiers used with @option{-march} and @option{-mcpu} can be one
+-the following:
++Feature modifiers used with @option{-march} and @option{-mcpu} can be any of
++the following and their inverses @option{no@var{feature}}:
+ 
+ @table @samp
+ @item crc
+ Enable CRC extension.
+ @item crypto
+-Enable Crypto extension.  This implies Advanced SIMD is enabled.
++Enable Crypto extension.  This also enables Advanced SIMD and floating-point
++instructions.
+ @item fp
+-Enable floating-point instructions.
++Enable floating-point instructions.  This is on by default for all possible
++values for options @option{-march} and @option{-mcpu}.
+ @item simd
+-Enable Advanced SIMD instructions.  This implies floating-point instructions
+-are enabled.  This is the default for all current possible values for options
+-@option{-march} and @option{-mcpu=}.
++Enable Advanced SIMD instructions.  This also enables floating-point
++instructions.  This is on by default for all possible values for options
++@option{-march} and @option{-mcpu}.
++@item lse
++Enable Large System Extension instructions.
++@item pan
++Enable Privileged Access Never support.
++@item lor
++Enable Limited Ordering Regions support.
++@item rdma
++Enable ARMv8.1 Advanced SIMD instructions.  This implies Advanced SIMD
++is enabled.
++
+ @end table
+ 
++That is, @option{crypto} implies @option{simd} implies @option{fp}.
++Conversely, @option{nofp} (or equivalently, @option{-mgeneral-regs-only})
++implies @option{nosimd} implies @option{nocrypto}.
++
+ @node Adapteva Epiphany Options
+ @subsection Adapteva Epiphany Options
+ 
+@@ -13169,7 +13214,7 @@ of the @option{-mcpu=} option.  Permissible names are: @samp{armv2},
+ @samp{armv2a}, @samp{armv3}, @samp{armv3m}, @samp{armv4}, @samp{armv4t},
+ @samp{armv5}, @samp{armv5t}, @samp{armv5e}, @samp{armv5te},
+ @samp{armv6}, @samp{armv6j},
+-@samp{armv6t2}, @samp{armv6z}, @samp{armv6zk}, @samp{armv6-m},
++@samp{armv6t2}, @samp{armv6z}, @samp{armv6kz}, @samp{armv6-m},
+ @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, @samp{armv7e-m},
+ @samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc},
+ @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+@@ -13208,9 +13253,9 @@ Permissible names are: @samp{arm2}, @samp{arm250},
  @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e},
  @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
  @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
@@ -5012,7 +6946,7 @@
  @samp{cortex-r4},
  @samp{cortex-r4f}, @samp{cortex-r5}, @samp{cortex-r7}, @samp{cortex-m7},
  @samp{cortex-m4},
-@@ -13229,8 +13244,8 @@ Permissible names are: @samp{arm2}, @samp{arm250},
+@@ -13230,8 +13275,8 @@ Permissible names are: @samp{arm2}, @samp{arm250},
  
  Additionally, this option can specify that GCC should tune the performance
  of the code for a big.LITTLE system.  Permissible names are:
@@ -5023,7 +6957,18 @@
  
  @option{-mtune=generic-@var{arch}} specifies that GCC should tune the
  performance for a blend of processors within architecture @var{arch}.
-@@ -15324,13 +15339,19 @@ These @samp{-m} options are defined for GNU/Linux targets:
+@@ -13397,6 +13442,10 @@ that executes in ARM state, but the default can be changed by
+ configuring GCC with the @option{--with-mode=}@var{state}
+ configure option.
+ 
++You can also override the ARM and Thumb mode for each function
++by using the @code{target("thumb")} and @code{target("arm")} function attributes
++(@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
++
+ @item -mtpcs-frame
+ @opindex mtpcs-frame
+ Generate a stack frame that is compliant with the Thumb Procedure Call
+@@ -15325,13 +15374,19 @@ These @samp{-m} options are defined for GNU/Linux targets:
  @item -mglibc
  @opindex mglibc
  Use the GNU C library.  This is the default except
@@ -5063,6 +7008,18 @@
  @deftypefnx {GIMPLE function} tree gimple_convert (gimple_seq *, location_t, tree, tree);
  @end deftypefn
  
+--- a/src/gcc/doc/sourcebuild.texi
++++ b/src/gcc/doc/sourcebuild.texi
+@@ -1695,6 +1695,9 @@ Target supports FPU instructions.
+ @item non_strict_align
+ Target does not require strict alignment.
+ 
++@item sqrt_insn
++Target has a square root instruction that the compiler can generate.
++
+ @item sse
+ Target supports compiling @code{sse} instructions.
+ 
 --- a/src/gcc/doc/tm.texi
 +++ b/src/gcc/doc/tm.texi
 @@ -9789,7 +9789,7 @@ be documented in @file{extend.texi}.
diff -u gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff
--- gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff
+++ gcc-5-5.2.1/debian/patches/gcc-linaro-no-macros.diff
@@ -88,8 +88,8 @@
 ===================================================================
 --- a/src/gcc/LINARO-VERSION
 +++ /dev/null
-@@ -1 +0,0 @@
--5.1-2015.07~dev
+@@ -1,1 +0,0 @@
+-Snapshot 5.2-2015.10
 Index: b/src/gcc/configure.ac
 ===================================================================
 --- a/src/gcc/configure.ac
diff -u gcc-5-5.2.1/debian/patches/gcc-linaro.diff gcc-5-5.2.1/debian/patches/gcc-linaro.diff
--- gcc-5-5.2.1/debian/patches/gcc-linaro.diff
+++ gcc-5-5.2.1/debian/patches/gcc-linaro.diff
@@ -1,6 +1,6 @@
-# DP: Changes for the Linaro 5-2015.09 release.
+# DP: Changes for the Linaro 5-2015.10 release.
 
-LANG=C git diff 2006973fa839ccbe189a1e7408400dc96ed880b4..ac19ac6481a3f326d9f41403f5dadab548b2c8a6 \
+LANG=C git diff 472e2599b141820b2a1565209528750de18731f8..5db159c220ec010ab6ae331802cddc242f83bb38 \
  | egrep -v '^(diff|index) ' \
  | filterdiff --strip=1 --addoldprefix=a/src/  --addnewprefix=b/src/
 
@@ -19,7 +19,7 @@
 --- a/src//dev/null
 +++ b/src/gcc/LINARO-VERSION
 @@ -0,0 +1 @@
-+5.1-2015.07~dev
++Snapshot 5.2-2015.10
 --- a/src/gcc/Makefile.in
 +++ b/src/gcc/Makefile.in
 @@ -527,10 +527,6 @@ xm_include_list=@xm_include_list@
@@ -87,6 +87,66 @@
  ada.clean:
  ada.distclean:
  	-$(RM) ada/Makefile
+--- a/src/gcc/builtins.c
++++ b/src/gcc/builtins.c
+@@ -5477,7 +5477,8 @@ expand_builtin_atomic_compare_exchange (machine_mode mode, tree exp,
+      the normal case where EXPECT is totally private, i.e. a register.  At
+      which point the store can be unconditional.  */
+   label = gen_label_rtx ();
+-  emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL, VOIDmode, 1, label);
++  emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL,
++			   GET_MODE (target), 1, label);
+   emit_move_insn (expect, oldval);
+   emit_label (label);
+ 
+--- a/src/gcc/c-family/c-common.h
++++ b/src/gcc/c-family/c-common.h
+@@ -1084,6 +1084,8 @@ extern const unsigned char executable_checksum[16];
+ /* In c-cppbuiltin.c  */
+ extern void builtin_define_std (const char *macro);
+ extern void builtin_define_with_value (const char *, const char *, int);
++extern void builtin_define_with_int_value (const char *, HOST_WIDE_INT);
++extern void builtin_define_type_sizeof (const char *, tree);
+ extern void c_stddef_cpp_builtins (void);
+ extern void fe_file_change (const struct line_map *);
+ extern void c_parse_error (const char *, enum cpp_ttype, tree, unsigned char);
+--- a/src/gcc/c-family/c-cppbuiltin.c
++++ b/src/gcc/c-family/c-cppbuiltin.c
+@@ -58,8 +58,6 @@ along with GCC; see the file COPYING3.  If not see
+ #endif
+ 
+ /* Non-static as some targets don't use it.  */
+-void builtin_define_std (const char *) ATTRIBUTE_UNUSED;
+-static void builtin_define_with_int_value (const char *, HOST_WIDE_INT);
+ static void builtin_define_with_hex_fp_value (const char *, tree,
+ 					      int, const char *,
+ 					      const char *,
+@@ -68,7 +66,6 @@ static void builtin_define_stdint_macros (void);
+ static void builtin_define_constants (const char *, tree);
+ static void builtin_define_type_max (const char *, tree);
+ static void builtin_define_type_minmax (const char *, const char *, tree);
+-static void builtin_define_type_sizeof (const char *, tree);
+ static void builtin_define_float_constants (const char *,
+ 					    const char *,
+ 					    const char *,
+@@ -113,7 +110,7 @@ mode_has_fma (machine_mode mode)
+ }
+ 
+ /* Define NAME with value TYPE size_unit.  */
+-static void
++void
+ builtin_define_type_sizeof (const char *name, tree type)
+ {
+   builtin_define_with_int_value (name,
+@@ -1372,7 +1369,7 @@ builtin_define_with_value (const char *macro, const char *expansion, int is_str)
+ 
+ 
+ /* Pass an object-like macro and an integer value to define it to.  */
+-static void
++void
+ builtin_define_with_int_value (const char *macro, HOST_WIDE_INT value)
+ {
+   char *buf;
 --- a/src/gcc/c/Make-lang.in
 +++ b/src/gcc/c/Make-lang.in
 @@ -95,6 +95,8 @@ c.srcman:
@@ -98,6 +158,85 @@
  
  # 'make check' in gcc/ looks for check-c.  Redirect it to check-gcc.
  check-c : check-gcc
+--- a/src/gcc/c/c-decl.c
++++ b/src/gcc/c/c-decl.c
+@@ -2632,6 +2632,12 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype)
+   else if (DECL_PRESERVE_P (newdecl))
+     DECL_PRESERVE_P (olddecl) = 1;
+ 
++  /* Merge DECL_COMMON */
++  if (VAR_P (olddecl) && VAR_P (newdecl)
++      && !lookup_attribute ("common", DECL_ATTRIBUTES (newdecl))
++      && !lookup_attribute ("nocommon", DECL_ATTRIBUTES (newdecl)))
++    DECL_COMMON (newdecl) = DECL_COMMON (newdecl) && DECL_COMMON (olddecl);
++
+   /* Copy most of the decl-specific fields of NEWDECL into OLDDECL.
+      But preserve OLDDECL's DECL_UID, DECL_CONTEXT and
+      DECL_ARGUMENTS (if appropriate).  */
+@@ -7524,12 +7530,23 @@ detect_field_duplicates (tree fieldlist)
+ /* Finish up struct info used by -Wc++-compat.  */
+ 
+ static void
+-warn_cxx_compat_finish_struct (tree fieldlist)
++warn_cxx_compat_finish_struct (tree fieldlist, enum tree_code code,
++			       location_t record_loc)
+ {
+   unsigned int ix;
+   tree x;
+   struct c_binding *b;
+ 
++  if (fieldlist == NULL_TREE)
++    {
++      if (code == RECORD_TYPE)
++	warning_at (record_loc, OPT_Wc___compat,
++		    "empty struct has size 0 in C, size 1 in C++");
++      else
++	warning_at (record_loc, OPT_Wc___compat,
++		    "empty union has size 0 in C, size 1 in C++");
++    }
++
+   /* Set the C_TYPE_DEFINED_IN_STRUCT flag for each type defined in
+      the current struct.  We do this now at the end of the struct
+      because the flag is used to issue visibility warnings, and we
+@@ -7862,7 +7879,7 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes,
+ 			  DECL_EXPR, build_decl (loc, TYPE_DECL, NULL, t)));
+ 
+   if (warn_cxx_compat)
+-    warn_cxx_compat_finish_struct (fieldlist);
++    warn_cxx_compat_finish_struct (fieldlist, TREE_CODE (t), loc);
+ 
+   struct_parse_info->struct_types.release ();
+   struct_parse_info->fields.release ();
+--- a/src/gcc/cfgexpand.c
++++ b/src/gcc/cfgexpand.c
+@@ -1382,7 +1382,16 @@ expand_one_var (tree var, bool toplevel, bool really_expand)
+   else
+     {
+       if (really_expand)
+-        expand_one_stack_var (origvar);
++        {
++          if (lookup_attribute ("naked",
++                                DECL_ATTRIBUTES (current_function_decl)))
++            error ("cannot allocate stack for variable %q+D, naked function.",
++                   var);
++
++          expand_one_stack_var (origvar);
++        }
++
++
+       return tree_to_uhwi (DECL_SIZE_UNIT (var));
+     }
+   return 0;
+--- a/src/gcc/cgraphunit.c
++++ b/src/gcc/cgraphunit.c
+@@ -2505,6 +2505,7 @@ cgraph_node::create_wrapper (cgraph_node *target)
+   memset (&thunk, 0, sizeof (cgraph_thunk_info));
+   thunk.thunk_p = true;
+   create_edge (target, NULL, count, CGRAPH_FREQ_BASE);
++  callees->can_throw_external = !TREE_NOTHROW (target->decl);
+ 
+   tree arguments = DECL_ARGUMENTS (decl);
+ 
 --- a/src/gcc/combine.c
 +++ b/src/gcc/combine.c
 @@ -1650,6 +1650,73 @@ setup_incoming_promotions (rtx_insn *first)
@@ -234,6 +373,110 @@
    /* Don't eliminate a store in the stack pointer.  */
    if (dest == stack_pointer_rtx
        /* Don't combine with an insn that sets a register to itself if it has
+@@ -5463,6 +5511,51 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
+       SUBST (XEXP (x, 1), temp);
+     }
+ 
++  /* Try to fold this expression in case we have constants that weren't
++     present before.  */
++  temp = 0;
++  switch (GET_RTX_CLASS (code))
++    {
++    case RTX_UNARY:
++      if (op0_mode == VOIDmode)
++	op0_mode = GET_MODE (XEXP (x, 0));
++      temp = simplify_unary_operation (code, mode, XEXP (x, 0), op0_mode);
++      break;
++    case RTX_COMPARE:
++    case RTX_COMM_COMPARE:
++      {
++	machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
++	if (cmp_mode == VOIDmode)
++	  {
++	    cmp_mode = GET_MODE (XEXP (x, 1));
++	    if (cmp_mode == VOIDmode)
++	      cmp_mode = op0_mode;
++	  }
++	temp = simplify_relational_operation (code, mode, cmp_mode,
++					      XEXP (x, 0), XEXP (x, 1));
++      }
++      break;
++    case RTX_COMM_ARITH:
++    case RTX_BIN_ARITH:
++      temp = simplify_binary_operation (code, mode, XEXP (x, 0), XEXP (x, 1));
++      break;
++    case RTX_BITFIELD_OPS:
++    case RTX_TERNARY:
++      temp = simplify_ternary_operation (code, mode, op0_mode, XEXP (x, 0),
++					 XEXP (x, 1), XEXP (x, 2));
++      break;
++    default:
++      break;
++    }
++
++  if (temp)
++    {
++      x = temp;
++      code = GET_CODE (temp);
++      op0_mode = VOIDmode;
++      mode = GET_MODE (temp);
++    }
++
+   /* If this is a simple operation applied to an IF_THEN_ELSE, try
+      applying it to the arms of the IF_THEN_ELSE.  This often simplifies
+      things.  Check for cases where both arms are testing the same
+@@ -5562,51 +5655,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
+ 	}
+     }
+ 
+-  /* Try to fold this expression in case we have constants that weren't
+-     present before.  */
+-  temp = 0;
+-  switch (GET_RTX_CLASS (code))
+-    {
+-    case RTX_UNARY:
+-      if (op0_mode == VOIDmode)
+-	op0_mode = GET_MODE (XEXP (x, 0));
+-      temp = simplify_unary_operation (code, mode, XEXP (x, 0), op0_mode);
+-      break;
+-    case RTX_COMPARE:
+-    case RTX_COMM_COMPARE:
+-      {
+-	machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
+-	if (cmp_mode == VOIDmode)
+-	  {
+-	    cmp_mode = GET_MODE (XEXP (x, 1));
+-	    if (cmp_mode == VOIDmode)
+-	      cmp_mode = op0_mode;
+-	  }
+-	temp = simplify_relational_operation (code, mode, cmp_mode,
+-					      XEXP (x, 0), XEXP (x, 1));
+-      }
+-      break;
+-    case RTX_COMM_ARITH:
+-    case RTX_BIN_ARITH:
+-      temp = simplify_binary_operation (code, mode, XEXP (x, 0), XEXP (x, 1));
+-      break;
+-    case RTX_BITFIELD_OPS:
+-    case RTX_TERNARY:
+-      temp = simplify_ternary_operation (code, mode, op0_mode, XEXP (x, 0),
+-					 XEXP (x, 1), XEXP (x, 2));
+-      break;
+-    default:
+-      break;
+-    }
+-
+-  if (temp)
+-    {
+-      x = temp;
+-      code = GET_CODE (temp);
+-      op0_mode = VOIDmode;
+-      mode = GET_MODE (temp);
+-    }
+-
+   /* First see if we can apply the inverse distributive law.  */
+   if (code == PLUS || code == MINUS
+       || code == AND || code == IOR || code == XOR)
 @@ -7723,9 +7771,8 @@ extract_left_shift (rtx x, int count)
     We try, as much as possible, to re-use rtl expressions to save memory.
  
@@ -280,6 +523,15 @@
      }
 --- a/src/gcc/config.gcc
 +++ b/src/gcc/config.gcc
+@@ -302,7 +302,7 @@ m32c*-*-*)
+ aarch64*-*-*)
+ 	cpu_type=aarch64
+ 	extra_headers="arm_neon.h arm_acle.h"
+-	extra_objs="aarch64-builtins.o aarch-common.o"
++	extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
+ 	target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
+ 	target_has_targetm_common=yes
+ 	;;
 @@ -575,7 +575,7 @@ case ${target} in
  esac
  
@@ -299,6 +551,15 @@
      *)
        tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"
        ;;
+@@ -3484,7 +3487,7 @@ case "${target}" in
+ 
+ 			eval "val=\$with_$which"
+ 			base_val=`echo $val | sed -e 's/\+.*//'`
+-			ext_val=`echo $val | sed -e 's/[a-z0-9\-]\+//'`
++			ext_val=`echo $val | sed -e 's/[a-z0-9.-]\+//'`
+ 
+ 			if [ $which = arch ]; then
+ 			  def=aarch64-arches.def
 --- a/src/gcc/config.host
 +++ b/src/gcc/config.host
 @@ -99,6 +99,14 @@ case ${host} in
@@ -316,6 +577,106 @@
    arm*-*-freebsd* | arm*-*-linux*)
      case ${target} in
        arm*-*-*)
+--- a/src/gcc/config/aarch64/aarch64-arches.def
++++ b/src/gcc/config/aarch64/aarch64-arches.def
+@@ -27,3 +27,4 @@
+    the flags implied by the architecture.  */
+ 
+ AARCH64_ARCH("armv8-a",	      generic,	     8,  AARCH64_FL_FOR_ARCH8)
++AARCH64_ARCH("armv8.1-a",     generic,	     8,  AARCH64_FL_FOR_ARCH8_1)
+--- a/src/gcc/config/aarch64/aarch64-builtins.c
++++ b/src/gcc/config/aarch64/aarch64-builtins.c
+@@ -133,7 +133,9 @@ enum aarch64_type_qualifiers
+   /* Polynomial types.  */
+   qualifier_poly = 0x100,
+   /* Lane indices - must be in range, and flipped for bigendian.  */
+-  qualifier_lane_index = 0x200
++  qualifier_lane_index = 0x200,
++  /* Lane indices for single lane structure loads and stores.  */
++  qualifier_struct_load_store_lane_index = 0x400
+ };
+ 
+ typedef struct
+@@ -235,7 +237,7 @@ aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ static enum aarch64_type_qualifiers
+ aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+   = { qualifier_none, qualifier_const_pointer_map_mode,
+-      qualifier_none, qualifier_none };
++      qualifier_none, qualifier_struct_load_store_lane_index };
+ #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
+ 
+ static enum aarch64_type_qualifiers
+@@ -267,7 +269,7 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ static enum aarch64_type_qualifiers
+ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+   = { qualifier_void, qualifier_pointer_map_mode,
+-      qualifier_none, qualifier_none };
++      qualifier_none, qualifier_struct_load_store_lane_index };
+ #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
+ 
+ #define CF0(N, X) CODE_FOR_aarch64_##N##X
+@@ -883,12 +885,14 @@ typedef enum
+   SIMD_ARG_COPY_TO_REG,
+   SIMD_ARG_CONSTANT,
+   SIMD_ARG_LANE_INDEX,
++  SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
+   SIMD_ARG_STOP
+ } builtin_simd_arg;
+ 
+ static rtx
+ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
+-			  tree exp, builtin_simd_arg *args)
++			  tree exp, builtin_simd_arg *args,
++			  enum machine_mode builtin_mode)
+ {
+   rtx pat;
+   rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand.  */
+@@ -927,6 +931,19 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
+ 		op[opc] = copy_to_mode_reg (mode, op[opc]);
+ 	      break;
+ 
++	    case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
++	      gcc_assert (opc > 1);
++	      if (CONST_INT_P (op[opc]))
++		{
++		  aarch64_simd_lane_bounds (op[opc], 0,
++					    GET_MODE_NUNITS (builtin_mode),
++					    exp);
++		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
++		  op[opc] =
++		    GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])));
++		}
++	      goto constant_arg;
++
+ 	    case SIMD_ARG_LANE_INDEX:
+ 	      /* Must be a previous operand into which this is an index.  */
+ 	      gcc_assert (opc > 0);
+@@ -941,6 +958,7 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
+ 	      /* Fall through - if the lane index isn't a constant then
+ 		 the next case will error.  */
+ 	    case SIMD_ARG_CONSTANT:
++constant_arg:
+ 	      if (!(*insn_data[icode].operand[opc].predicate)
+ 		  (op[opc], mode))
+ 	      {
+@@ -1049,6 +1067,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
+ 
+       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
+ 	args[k] = SIMD_ARG_LANE_INDEX;
++      else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
++	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
+       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+ 	args[k] = SIMD_ARG_CONSTANT;
+       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
+@@ -1072,7 +1092,7 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
+   /* The interface to aarch64_simd_expand_args expects a 0 if
+      the function is void, and a 1 if it is not.  */
+   return aarch64_simd_expand_args
+-	  (target, icode, !is_void, exp, &args[1]);
++	  (target, icode, !is_void, exp, &args[1], d->mode);
+ }
+ 
+ rtx
 --- a/src/gcc/config/aarch64/aarch64-cores.def
 +++ b/src/gcc/config/aarch64/aarch64-cores.def
 @@ -21,7 +21,7 @@
@@ -349,8 +710,8 @@
 -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8,  AARCH64_FL_FOR_ARCH8, xgene1)
 +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
 +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
-+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08")
-+AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001")
++AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
++AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
 +AARCH64_CORE("thunderx",    thunderx,  thunderx,  8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
 +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
  
@@ -359,7 +720,7 @@
 -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
 -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
 +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
-+AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03")
++AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
 --- a/src/gcc/config/aarch64/aarch64-cost-tables.h
 +++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
 @@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs =
@@ -385,6 +746,47 @@
  
  #ifdef HAVE_AS_MABI_OPTION
  #define ASM_MABI_SPEC	"%{mabi=*:-mabi=%*}"
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/aarch64-fusion-pairs.def
+@@ -0,0 +1,38 @@
++/* Copyright (C) 2015 Free Software Foundation, Inc.
++   Contributed by ARM Ltd.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Pairs of instructions which can be fused. before including this file,
++   define a macro:
++
++     AARCH64_FUSION_PAIR (name, internal_name, index_bit)
++
++   Where:
++
++     NAME is a string giving a friendly name for the instructions to fuse.
++     INTERNAL_NAME gives the internal name suitable for appending to
++     AARCH64_FUSE_ to give an enum name.
++     INDEX_BIT is the bit to set in the bitmask of supported fusion
++     operations.  */
++
++AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0)
++AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1)
++AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2)
++AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3)
++AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4)
++
 --- a/src/gcc/config/aarch64/aarch64-linux.h
 +++ b/src/gcc/config/aarch64/aarch64-linux.h
 @@ -23,6 +23,9 @@
@@ -399,7 +801,7 @@
  
 --- a/src/gcc/config/aarch64/aarch64-option-extensions.def
 +++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
-@@ -21,18 +21,25 @@
+@@ -21,18 +21,29 @@
  
     Before using #include to read this file, define a macro:
  
@@ -427,10 +829,14 @@
 -AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,	AARCH64_FL_SIMD | AARCH64_FL_CRYPTO)
 -AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,	AARCH64_FL_CRYPTO)
 -AARCH64_OPT_EXTENSION("crc",	AARCH64_FL_CRC,	AARCH64_FL_CRC)
-+AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,                          AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")
-+AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,                      AARCH64_FL_SIMD | AARCH64_FL_CRYPTO,   "asimd")
-+AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,  AARCH64_FL_CRYPTO,                     "aes pmull sha1 sha2")
++AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,                          AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
++AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,                      AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA,   "asimd")
++AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,  AARCH64_FL_CRYPTO,   "aes pmull sha1 sha2")
 +AARCH64_OPT_EXTENSION("crc",	AARCH64_FL_CRC,                         AARCH64_FL_CRC,                        "crc32")
++AARCH64_OPT_EXTENSION("lse",	AARCH64_FL_LSE,                         AARCH64_FL_LSE,                        "lse")
++AARCH64_OPT_EXTENSION("pan",	AARCH64_FL_PAN,		AARCH64_FL_PAN,		"pan")
++AARCH64_OPT_EXTENSION("lor",	AARCH64_FL_LOR,		AARCH64_FL_LOR,		"lor")
++AARCH64_OPT_EXTENSION("rdma",	AARCH64_FL_RDMA | AARCH64_FL_FPSIMD,	AARCH64_FL_RDMA,	"rdma")
 --- a/src/gcc/config/aarch64/aarch64-opts.h
 +++ b/src/gcc/config/aarch64/aarch64-opts.h
 @@ -25,7 +25,7 @@
@@ -444,7 +850,7 @@
  #undef AARCH64_CORE
 --- a/src/gcc/config/aarch64/aarch64-protos.h
 +++ b/src/gcc/config/aarch64/aarch64-protos.h
-@@ -162,12 +162,20 @@ struct cpu_vector_cost
+@@ -162,26 +162,78 @@ struct cpu_vector_cost
    const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
  };
  
@@ -457,21 +863,77 @@
 +
  struct tune_params
  {
-   const struct cpu_cost_table *const insn_extra_cost;
-   const struct cpu_addrcost_table *const addr_cost;
-   const struct cpu_regmove_cost *const regmove_cost;
-   const struct cpu_vector_cost *const vec_costs;
-+  const struct cpu_branch_cost *const branch_costs;
-   const int memmov_cost;
-   const int issue_rate;
-   const unsigned int fuseable_ops;
-@@ -177,11 +185,14 @@ struct tune_params
-   const int int_reassoc_width;
-   const int fp_reassoc_width;
-   const int vec_reassoc_width;
-+  const int min_div_recip_mul_sf;
-+  const int min_div_recip_mul_df;
+-  const struct cpu_cost_table *const insn_extra_cost;
+-  const struct cpu_addrcost_table *const addr_cost;
+-  const struct cpu_regmove_cost *const regmove_cost;
+-  const struct cpu_vector_cost *const vec_costs;
+-  const int memmov_cost;
+-  const int issue_rate;
+-  const unsigned int fuseable_ops;
+-  const int function_align;
+-  const int jump_align;
+-  const int loop_align;
+-  const int int_reassoc_width;
+-  const int fp_reassoc_width;
+-  const int vec_reassoc_width;
++  const struct cpu_cost_table *insn_extra_cost;
++  const struct cpu_addrcost_table *addr_cost;
++  const struct cpu_regmove_cost *regmove_cost;
++  const struct cpu_vector_cost *vec_costs;
++  const struct cpu_branch_cost *branch_costs;
++  int memmov_cost;
++  int issue_rate;
++  unsigned int fusible_ops;
++  int function_align;
++  int jump_align;
++  int loop_align;
++  int int_reassoc_width;
++  int fp_reassoc_width;
++  int vec_reassoc_width;
++  int min_div_recip_mul_sf;
++  int min_div_recip_mul_df;
++  unsigned int extra_tuning_flags;
++};
++
++#define AARCH64_FUSION_PAIR(x, name, index) \
++  AARCH64_FUSE_##name = (1 << index),
++/* Supported fusion operations.  */
++enum aarch64_fusion_pairs
++{
++  AARCH64_FUSE_NOTHING = 0,
++#include "aarch64-fusion-pairs.def"
++
++/* Hacky macro to build AARCH64_FUSE_ALL.  The sequence below expands
++   to:
++   AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ...  */
++#undef AARCH64_FUSION_PAIR
++#define AARCH64_FUSION_PAIR(x, name, y) \
++  | AARCH64_FUSE_##name
++
++  AARCH64_FUSE_ALL = 0
++#include "aarch64-fusion-pairs.def"
  };
++#undef AARCH64_FUSION_PAIR
++
++#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \
++  AARCH64_EXTRA_TUNE_##name = (1 << index),
++/* Supported tuning flags.  */
++enum aarch64_extra_tuning_flags
++{
++  AARCH64_EXTRA_TUNE_NONE = 0,
++#include "aarch64-tuning-flags.def"
++
++/* Hacky macro to build the "all" flag mask.
++   Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc.  */
++#undef AARCH64_EXTRA_TUNING_OPTION
++#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \
++  | AARCH64_EXTRA_TUNE_##name
++  AARCH64_EXTRA_TUNE_ALL = 0
++#include "aarch64-tuning-flags.def"
++};
++#undef AARCH64_EXTRA_TUNING_OPTION
++
++extern struct tune_params aarch64_tune_params;
  
  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
  int aarch64_get_condition_code (rtx);
@@ -480,7 +942,15 @@
  enum aarch64_symbol_type
  aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
  bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
-@@ -264,12 +275,6 @@ void init_aarch64_simd_builtins (void);
+@@ -248,6 +300,7 @@ unsigned aarch64_dbx_register_number (unsigned);
+ unsigned aarch64_trampoline_size (void);
+ void aarch64_asm_output_labelref (FILE *, const char *);
+ void aarch64_elf_asm_named_section (const char *, unsigned, tree);
++void aarch64_err_no_fpadvsimd (machine_mode, const char *);
+ void aarch64_expand_epilogue (bool);
+ void aarch64_expand_mov_immediate (rtx, rtx);
+ void aarch64_expand_prologue (void);
+@@ -264,12 +317,6 @@ void init_aarch64_simd_builtins (void);
  
  void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int);
  
@@ -493,6 +963,45 @@
  /* Expand builtins for SIMD intrinsics.  */
  rtx aarch64_simd_expand_builtin (int, tree, rtx);
  
+@@ -295,6 +342,10 @@ rtx aarch64_load_tp (rtx);
+ 
+ void aarch64_expand_compare_and_swap (rtx op[]);
+ void aarch64_split_compare_and_swap (rtx op[]);
++void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
++
++bool aarch64_atomic_ldop_supported_p (enum rtx_code);
++void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+ void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
+ 
+ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
+--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
++++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
+@@ -88,9 +88,9 @@
+   BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
+   BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
+   /* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>.  */
+-  BUILTIN_VQ (LOADSTRUCT_LANE, ld2_lane, 0)
+-  BUILTIN_VQ (LOADSTRUCT_LANE, ld3_lane, 0)
+-  BUILTIN_VQ (LOADSTRUCT_LANE, ld4_lane, 0)
++  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld2_lane, 0)
++  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld3_lane, 0)
++  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld4_lane, 0)
+   /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
+   BUILTIN_VDC (STORESTRUCT, st2, 0)
+   BUILTIN_VDC (STORESTRUCT, st3, 0)
+@@ -100,9 +100,9 @@
+   BUILTIN_VQ (STORESTRUCT, st3, 0)
+   BUILTIN_VQ (STORESTRUCT, st4, 0)
+ 
+-  BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
+-  BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
+-  BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
++  BUILTIN_VALLDIF (STORESTRUCT_LANE, st2_lane, 0)
++  BUILTIN_VALLDIF (STORESTRUCT_LANE, st3_lane, 0)
++  BUILTIN_VALLDIF (STORESTRUCT_LANE, st4_lane, 0)
+ 
+   BUILTIN_VQW (BINOP, saddl2, 0)
+   BUILTIN_VQW (BINOP, uaddl2, 0)
 --- a/src/gcc/config/aarch64/aarch64-simd.md
 +++ b/src/gcc/config/aarch64/aarch64-simd.md
 @@ -2057,13 +2057,13 @@
@@ -555,7 +1064,23 @@
    "TARGET_SIMD"
  {
    emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
-@@ -3955,6 +3955,7 @@
+@@ -3919,10 +3919,13 @@
+ 	(unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ 		    (match_operand:OI 2 "register_operand" "0")
+ 		    (match_operand:SI 3 "immediate_operand" "i")
+-		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+ 		   UNSPEC_LD2_LANE))]
+   "TARGET_SIMD"
+-  "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
++  {
++    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
++    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
++  }
+   [(set_attr "type" "neon_load2_one_lane")]
+ )
+ 
+@@ -3955,15 +3958,19 @@
    [(set_attr "type" "neon_store2_2reg<q>")]
  )
  
@@ -563,19 +1088,39 @@
  (define_insn "vec_store_lanesoi_lane<mode>"
    [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
  	(unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
-@@ -3962,7 +3963,10 @@
+-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
  		    (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_ST2_LANE))]
+-                   UNSPEC_ST2_LANE))]
++		   UNSPEC_ST2_LANE))]
    "TARGET_SIMD"
 -  "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
+-  [(set_attr "type" "neon_store3_one_lane<q>")]
 +  {
 +    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
 +    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
 +  }
-   [(set_attr "type" "neon_store3_one_lane<q>")]
++  [(set_attr "type" "neon_store2_one_lane<q>")]
+ )
+ 
+ (define_expand "vec_store_lanesoi<mode>"
+@@ -4010,10 +4017,13 @@
+ 	(unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ 		    (match_operand:CI 2 "register_operand" "0")
+ 		    (match_operand:SI 3 "immediate_operand" "i")
+-		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ 		   UNSPEC_LD3_LANE))]
+   "TARGET_SIMD"
+-  "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
++{
++    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
++    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
++}
+   [(set_attr "type" "neon_load3_one_lane")]
  )
  
-@@ -4046,6 +4050,7 @@
+@@ -4046,14 +4056,18 @@
    [(set_attr "type" "neon_store3_3reg<q>")]
  )
  
@@ -583,9 +1128,11 @@
  (define_insn "vec_store_lanesci_lane<mode>"
    [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
  	(unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
-@@ -4053,7 +4058,10 @@
+-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
  		    (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_ST3_LANE))]
+-                   UNSPEC_ST3_LANE))]
++		   UNSPEC_ST3_LANE))]
    "TARGET_SIMD"
 -  "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
 +  {
@@ -595,7 +1142,23 @@
    [(set_attr "type" "neon_store3_one_lane<q>")]
  )
  
-@@ -4137,6 +4145,7 @@
+@@ -4101,10 +4115,13 @@
+ 	(unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ 		    (match_operand:XI 2 "register_operand" "0")
+ 		    (match_operand:SI 3 "immediate_operand" "i")
+-		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ 		   UNSPEC_LD4_LANE))]
+   "TARGET_SIMD"
+-  "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
++{
++    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
++    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
++}
+   [(set_attr "type" "neon_load4_one_lane")]
+ )
+ 
+@@ -4137,14 +4154,18 @@
    [(set_attr "type" "neon_store4_4reg<q>")]
  )
  
@@ -603,9 +1166,11 @@
  (define_insn "vec_store_lanesxi_lane<mode>"
    [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
  	(unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
-@@ -4144,7 +4153,10 @@
+-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
  		    (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_ST4_LANE))]
+-                   UNSPEC_ST4_LANE))]
++		   UNSPEC_ST4_LANE))]
    "TARGET_SIMD"
 -  "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
 +  {
@@ -615,41 +1180,253 @@
    [(set_attr "type" "neon_store4_one_lane<q>")]
  )
  
+@@ -4554,14 +4575,12 @@
+ 	(match_operand:DI 1 "register_operand" "w")
+ 	(match_operand:OI 2 "register_operand" "0")
+ 	(match_operand:SI 3 "immediate_operand" "i")
+-	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_TWO_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[1]);
+ 
+-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
+-			    NULL);
+   emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
+ 						      mem,
+ 						      operands[2],
+@@ -4574,14 +4593,12 @@
+ 	(match_operand:DI 1 "register_operand" "w")
+ 	(match_operand:CI 2 "register_operand" "0")
+ 	(match_operand:SI 3 "immediate_operand" "i")
+-	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_THREE_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[1]);
+ 
+-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
+-			    NULL);
+   emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
+ 						      mem,
+ 						      operands[2],
+@@ -4594,14 +4611,12 @@
+ 	(match_operand:DI 1 "register_operand" "w")
+ 	(match_operand:XI 2 "register_operand" "0")
+ 	(match_operand:SI 3 "immediate_operand" "i")
+-	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_FOUR_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[1]);
+ 
+-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
+-			    NULL);
+   emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
+ 						      mem,
+ 						      operands[2],
+@@ -4838,54 +4853,45 @@
+   DONE;
+ })
+ 
+-(define_expand "aarch64_st2_lane<VQ:mode>"
++(define_expand "aarch64_st2_lane<mode>"
+  [(match_operand:DI 0 "register_operand" "r")
+   (match_operand:OI 1 "register_operand" "w")
+-  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+   (match_operand:SI 2 "immediate_operand")]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_TWO_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[0]);
+-  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ 
+-  emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
+-						  operands[1],
+-						  operands[2]));
++  emit_insn (gen_vec_store_lanesoi_lane<mode> (mem, operands[1], operands[2]));
+   DONE;
+ })
+ 
+-(define_expand "aarch64_st3_lane<VQ:mode>"
++(define_expand "aarch64_st3_lane<mode>"
+  [(match_operand:DI 0 "register_operand" "r")
+   (match_operand:CI 1 "register_operand" "w")
+-  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+   (match_operand:SI 2 "immediate_operand")]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_THREE_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[0]);
+-  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ 
+-  emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
+-						  operands[1],
+-						  operands[2]));
++  emit_insn (gen_vec_store_lanesci_lane<mode> (mem, operands[1], operands[2]));
+   DONE;
+ })
+ 
+-(define_expand "aarch64_st4_lane<VQ:mode>"
++(define_expand "aarch64_st4_lane<mode>"
+  [(match_operand:DI 0 "register_operand" "r")
+   (match_operand:XI 1 "register_operand" "w")
+-  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
++  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+   (match_operand:SI 2 "immediate_operand")]
+   "TARGET_SIMD"
+ {
+   machine_mode mode = <V_FOUR_ELEM>mode;
+   rtx mem = gen_rtx_MEM (mode, operands[0]);
+-  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ 
+-  emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
+-						  operands[1],
+-						  operands[2]));
++  emit_insn (gen_vec_store_lanesxi_lane<mode> (mem, operands[1], operands[2]));
+   DONE;
+ })
+ 
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def
+@@ -0,0 +1,34 @@
++/* Copyright (C) 2015 Free Software Foundation, Inc.
++   Contributed by ARM Ltd.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Additional control over certain tuning parameters.  Before including
++   this file, define a macro:
++
++     AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit)
++
++   Where:
++
++     NAME is a string giving a friendly name for the tuning flag.
++     INTERNAL_NAME gives the internal name suitable for appending to
++     AARCH64_TUNE_ to give an enum name.
++     INDEX_BIT is the bit to set in the bitmask of supported tuning
++     flags.  */
++
++AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0)
++
 --- a/src/gcc/config/aarch64/aarch64.c
 +++ b/src/gcc/config/aarch64/aarch64.c
-@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =
- #define AARCH64_FUSE_ADRP_LDR	(1 << 3)
- #define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
+@@ -95,6 +95,7 @@
+ #include "rtl-iter.h"
+ #include "tm-constrs.h"
+ #include "sched-int.h"
++#include "cortex-a57-fma-steering.h"
+ 
+ /* Defined for convenience.  */
+ #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+@@ -177,15 +178,42 @@ unsigned aarch64_architecture_version;
+ /* The processor for which instructions should be scheduled.  */
+ enum aarch64_processor aarch64_tune = cortexa53;
+ 
+-/* The current tuning set.  */
+-const struct tune_params *aarch64_tune_params;
+-
+ /* Mask to specify which instructions we are allowed to generate.  */
+ unsigned long aarch64_isa_flags = 0;
+ 
+ /* Mask to specify which instruction scheduling options should be used.  */
+ unsigned long aarch64_tune_flags = 0;
+ 
++/* Support for command line parsing of boolean flags in the tuning
++   structures.  */
++struct aarch64_flag_desc
++{
++  const char* name;
++  unsigned int flag;
++};
++
++#define AARCH64_FUSION_PAIR(name, internal_name, y) \
++  { name, AARCH64_FUSE_##internal_name },
++static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
++{
++  { "none", AARCH64_FUSE_NOTHING },
++#include "aarch64-fusion-pairs.def"
++  { "all", AARCH64_FUSE_ALL },
++  { NULL, AARCH64_FUSE_NOTHING }
++};
++#undef AARCH64_FUION_PAIR
++
++#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
++  { name, AARCH64_EXTRA_TUNE_##internal_name },
++static const struct aarch64_flag_desc aarch64_tuning_flags[] =
++{
++  { "none", AARCH64_EXTRA_TUNE_NONE },
++#include "aarch64-tuning-flags.def"
++  { "all", AARCH64_EXTRA_TUNE_ALL },
++  { NULL, AARCH64_EXTRA_TUNE_NONE }
++};
++#undef AARCH64_EXTRA_TUNING_OPTION
++
+ /* Tuning parameters.  */
+ 
+ static const struct cpu_addrcost_table generic_addrcost_table =
+@@ -332,12 +360,12 @@ static const struct cpu_vector_cost xgene1_vector_cost =
+   1 /* cond_not_taken_branch_cost  */
+ };
  
+-#define AARCH64_FUSE_NOTHING	(0)
+-#define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+-#define AARCH64_FUSE_ADRP_ADD	(1 << 1)
+-#define AARCH64_FUSE_MOVK_MOVK	(1 << 2)
+-#define AARCH64_FUSE_ADRP_LDR	(1 << 3)
+-#define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
 +/* Generic costs for branch instructions.  */
 +static const struct cpu_branch_cost generic_branch_cost =
 +{
 +  2,  /* Predictable.  */
 +  2   /* Unpredictable.  */
 +};
-+
+ 
  static const struct tune_params generic_tunings =
  {
-   &cortexa57_extra_costs,
+@@ -345,15 +373,19 @@ static const struct tune_params generic_tunings =
    &generic_addrcost_table,
    &generic_regmove_cost,
    &generic_vector_cost,
 +  &generic_branch_cost,
    4, /* memmov_cost  */
    2, /* issue_rate  */
-   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
-@@ -353,7 +361,9 @@ static const struct tune_params generic_tunings =
+-  AARCH64_FUSE_NOTHING, /* fuseable_ops  */
++  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+   8,	/* function_align.  */
+   8,	/* jump_align.  */
    4,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
 -  1	/* vec_reassoc_width.  */
 +  1,	/* vec_reassoc_width.  */
 +  2,	/* min_div_recip_mul_sf.  */
-+  2	/* min_div_recip_mul_df.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
  };
  
  static const struct tune_params cortexa53_tunings =
-@@ -362,6 +372,7 @@ static const struct tune_params cortexa53_tunings =
+@@ -362,16 +394,20 @@ static const struct tune_params cortexa53_tunings =
    &generic_addrcost_table,
    &cortexa53_regmove_cost,
    &generic_vector_cost,
@@ -657,75 +1434,129 @@
    4, /* memmov_cost  */
    2, /* issue_rate  */
    (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-@@ -371,7 +382,9 @@ static const struct tune_params cortexa53_tunings =
+-   | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops  */
++   | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
+   8,	/* function_align.  */
+   8,	/* jump_align.  */
    4,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
 -  1	/* vec_reassoc_width.  */
 +  1,	/* vec_reassoc_width.  */
 +  2,	/* min_div_recip_mul_sf.  */
-+  2	/* min_div_recip_mul_df.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
  };
  
  static const struct tune_params cortexa57_tunings =
-@@ -380,6 +393,7 @@ static const struct tune_params cortexa57_tunings =
+@@ -380,16 +416,42 @@ static const struct tune_params cortexa57_tunings =
    &cortexa57_addrcost_table,
    &cortexa57_regmove_cost,
    &cortexa57_vector_cost,
 +  &generic_branch_cost,
++  4, /* memmov_cost  */
++  3, /* issue_rate  */
++  (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
++   | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
++  16,	/* function_align.  */
++  8,	/* jump_align.  */
++  4,	/* loop_align.  */
++  2,	/* int_reassoc_width.  */
++  4,	/* fp_reassoc_width.  */
++  1,	/* vec_reassoc_width.  */
++  2,	/* min_div_recip_mul_sf.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)	/* tune_flags.  */
++};
++
++static const struct tune_params cortexa72_tunings =
++{
++  &cortexa57_extra_costs,
++  &cortexa57_addrcost_table,
++  &cortexa57_regmove_cost,
++  &cortexa57_vector_cost,
++  &generic_branch_cost,
    4, /* memmov_cost  */
    3, /* issue_rate  */
    (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-@@ -389,7 +403,9 @@ static const struct tune_params cortexa57_tunings =
+-   | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
++   | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
+   16,	/* function_align.  */
+   8,	/* jump_align.  */
    4,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
 -  1	/* vec_reassoc_width.  */
 +  1,	/* vec_reassoc_width.  */
 +  2,	/* min_div_recip_mul_sf.  */
-+  2	/* min_div_recip_mul_df.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
  };
  
  static const struct tune_params thunderx_tunings =
-@@ -398,6 +414,7 @@ static const struct tune_params thunderx_tunings =
+@@ -398,15 +460,19 @@ static const struct tune_params thunderx_tunings =
    &generic_addrcost_table,
    &thunderx_regmove_cost,
    &generic_vector_cost,
 +  &generic_branch_cost,
    6, /* memmov_cost  */
    2, /* issue_rate  */
-   AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops  */
-@@ -406,7 +423,9 @@ static const struct tune_params thunderx_tunings =
+-  AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops  */
++  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
+   8,	/* function_align.  */
+   8,	/* jump_align.  */
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
 -  1	/* vec_reassoc_width.  */
 +  1,	/* vec_reassoc_width.  */
 +  2,	/* min_div_recip_mul_sf.  */
-+  2	/* min_div_recip_mul_df.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
  };
  
  static const struct tune_params xgene1_tunings =
-@@ -415,6 +434,7 @@ static const struct tune_params xgene1_tunings =
+@@ -415,15 +481,37 @@ static const struct tune_params xgene1_tunings =
    &xgene1_addrcost_table,
    &xgene1_regmove_cost,
    &xgene1_vector_cost,
 +  &generic_branch_cost,
    6, /* memmov_cost  */
    4, /* issue_rate  */
-   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
-@@ -423,7 +443,9 @@ static const struct tune_params xgene1_tunings =
+-  AARCH64_FUSE_NOTHING, /* fuseable_ops  */
++  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+   16,	/* function_align.  */
+   8,	/* jump_align.  */
    16,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
 -  1	/* vec_reassoc_width.  */
 +  1,	/* vec_reassoc_width.  */
 +  2,	/* min_div_recip_mul_sf.  */
-+  2	/* min_div_recip_mul_df.  */
++  2,	/* min_div_recip_mul_df.  */
++  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
++};
++
++/* Support for fine-grained override of the tuning structures.  */
++struct aarch64_tuning_override_function
++{
++  const char* name;
++  void (*parse_override)(const char*, struct tune_params*);
++};
++
++static void aarch64_parse_fuse_string (const char*, struct tune_params*);
++static void aarch64_parse_tune_string (const char*, struct tune_params*);
++
++static const struct aarch64_tuning_override_function
++aarch64_tuning_override_functions[] =
++{
++  { "fuse", aarch64_parse_fuse_string },
++  { "tune", aarch64_parse_tune_string },
++  { NULL, NULL }
  };
  
  /* A processor implementing AArch64.  */
-@@ -440,7 +462,7 @@ struct processor
+@@ -440,7 +528,7 @@ struct processor
  /* Processor cores implementing AArch64.  */
  static const struct processor all_cores[] =
  {
@@ -734,7 +1565,17 @@
    {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
  #include "aarch64-cores.def"
  #undef AARCH64_CORE
-@@ -477,7 +499,7 @@ struct aarch64_option_extension
+@@ -464,6 +552,9 @@ static const struct processor *selected_arch;
+ static const struct processor *selected_cpu;
+ static const struct processor *selected_tune;
+ 
++/* The current tuning set.  */
++struct tune_params aarch64_tune_params = generic_tunings;
++
+ #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
+ 
+ /* An ISA extension in the co-processor and main instruction set space.  */
+@@ -477,7 +568,7 @@ struct aarch64_option_extension
  /* ISA extensions in AArch64.  */
  static const struct aarch64_option_extension all_extensions[] =
  {
@@ -743,21 +1584,75 @@
    {NAME, FLAGS_ON, FLAGS_OFF},
  #include "aarch64-option-extensions.def"
  #undef AARCH64_OPT_EXTENSION
-@@ -512,9 +534,11 @@ static const char * const aarch64_condition_codes[] =
+@@ -511,10 +602,22 @@ static const char * const aarch64_condition_codes[] =
+   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
  };
  
++void
++aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
++{
++  const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
++  if (TARGET_GENERAL_REGS_ONLY)
++    error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
++  else
++    error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
++}
++
  static unsigned int
 -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
 +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
  {
 -  return 2;
 +  if (GET_MODE_UNIT_SIZE (mode) == 4)
-+    return aarch64_tune_params->min_div_recip_mul_sf;
-+  return aarch64_tune_params->min_div_recip_mul_df;
++    return aarch64_tune_params.min_div_recip_mul_sf;
++  return aarch64_tune_params.min_div_recip_mul_df;
  }
  
  static int
-@@ -4901,8 +4925,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
+@@ -522,11 +625,11 @@ aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
+ 			     enum machine_mode mode)
+ {
+   if (VECTOR_MODE_P (mode))
+-    return aarch64_tune_params->vec_reassoc_width;
++    return aarch64_tune_params.vec_reassoc_width;
+   if (INTEGRAL_MODE_P (mode))
+-    return aarch64_tune_params->int_reassoc_width;
++    return aarch64_tune_params.int_reassoc_width;
+   if (FLOAT_MODE_P (mode))
+-    return aarch64_tune_params->fp_reassoc_width;
++    return aarch64_tune_params.fp_reassoc_width;
+   return 1;
+ }
+ 
+@@ -1763,6 +1866,9 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
+      and homogenous short-vector aggregates (HVA).  */
+   if (allocate_nvrn)
+     {
++      if (!TARGET_FLOAT)
++	aarch64_err_no_fpadvsimd (mode, "argument");
++
+       if (nvrn + nregs <= NUM_FP_ARG_REGS)
+ 	{
+ 	  pcum->aapcs_nextnvrn = nvrn + nregs;
+@@ -1889,6 +1995,17 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
+   pcum->aapcs_stack_words = 0;
+   pcum->aapcs_stack_size = 0;
+ 
++  if (!TARGET_FLOAT
++      && fndecl && TREE_PUBLIC (fndecl)
++      && fntype && fntype != error_mark_node)
++    {
++      const_tree type = TREE_TYPE (fntype);
++      machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument.  */
++      int nregs ATTRIBUTE_UNUSED; /* Likewise.  */
++      if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
++						   &mode, &nregs, NULL))
++	aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
++    }
+   return;
+ }
+ 
+@@ -4901,8 +5018,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
      case FP_REGS:
      case FP_LO_REGS:
        return
@@ -769,7 +1664,7 @@
      case STACK_REG:
        return 1;
  
-@@ -5157,9 +5182,18 @@ aarch64_strip_extend (rtx x)
+@@ -5157,9 +5275,18 @@ aarch64_strip_extend (rtx x)
    return x;
  }
  
@@ -790,16 +1685,19 @@
     operands where needed.  */
  
  static int
-@@ -5169,7 +5203,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5167,9 +5294,9 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+ {
+   rtx op0, op1;
    const struct cpu_cost_table *extra_cost
-     = aarch64_tune_params->insn_extra_cost;
+-    = aarch64_tune_params->insn_extra_cost;
++    = aarch64_tune_params.insn_extra_cost;
    int cost = 0;
 -  bool maybe_fma = (outer == PLUS || outer == MINUS);
 +  bool compound_p = (outer == PLUS || outer == MINUS);
    machine_mode mode = GET_MODE (x);
  
    gcc_checking_assert (code == MULT);
-@@ -5184,24 +5218,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5184,24 +5311,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
    if (GET_MODE_CLASS (mode) == MODE_INT)
      {
        /* The multiply will be canonicalized as a shift, cost it as such.  */
@@ -856,7 +1754,7 @@
        /* Integer multiplies or FMAs have zero/sign extending variants.  */
        if ((GET_CODE (op0) == ZERO_EXTEND
  	   && GET_CODE (op1) == ZERO_EXTEND)
-@@ -5213,8 +5273,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5213,8 +5366,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
  
  	  if (speed)
  	    {
@@ -867,7 +1765,7 @@
  		cost += extra_cost->mult[0].extend_add;
  	      else
  		/* MUL/SMULL/UMULL.  */
-@@ -5224,15 +5284,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5224,15 +5377,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
  	  return cost;
  	}
  
@@ -886,7 +1784,7 @@
  	    cost += extra_cost->mult[mode == DImode].add;
  	  else
  	    /* MUL.  */
-@@ -5250,7 +5310,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5250,7 +5403,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
  	     which case FNMUL is different than FMUL with operand negation.  */
  	  bool neg0 = GET_CODE (op0) == NEG;
  	  bool neg1 = GET_CODE (op1) == NEG;
@@ -895,7 +1793,7 @@
  	    {
  	      if (neg0)
  		op0 = XEXP (op0, 0);
-@@ -5258,7 +5318,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+@@ -5258,7 +5411,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
  		op1 = XEXP (op1, 0);
  	    }
  
@@ -904,7 +1802,16 @@
  	    /* FMADD/FNMADD/FNMSUB/FMSUB.  */
  	    cost += extra_cost->fp[mode == DFmode].fma;
  	  else
-@@ -5367,6 +5427,23 @@ aarch64_address_cost (rtx x,
+@@ -5279,7 +5432,7 @@ aarch64_address_cost (rtx x,
+ 		      bool speed)
+ {
+   enum rtx_code c = GET_CODE (x);
+-  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
++  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
+   struct aarch64_address_info info;
+   int cost = 0;
+   info.shift = 0;
+@@ -5367,6 +5520,23 @@ aarch64_address_cost (rtx x,
    return cost;
  }
  
@@ -917,7 +1824,7 @@
 +{
 +  /* When optimizing for speed, use the cost of unpredictable branches.  */
 +  const struct cpu_branch_cost *branch_costs =
-+    aarch64_tune_params->branch_costs;
++    aarch64_tune_params.branch_costs;
 +
 +  if (!speed_p || predictable_p)
 +    return branch_costs->predictable;
@@ -928,7 +1835,7 @@
  /* Return true if the RTX X in mode MODE is a zero or sign extract
     usable in an ADD or SUB (extended register) instruction.  */
  static bool
-@@ -5415,6 +5492,51 @@ aarch64_frint_unspec_p (unsigned int u)
+@@ -5415,6 +5585,51 @@ aarch64_frint_unspec_p (unsigned int u)
      }
  }
  
@@ -980,7 +1887,16 @@
  /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
     storing it in *COST.  Result is true if the total cost of the operation
     has now been calculated.  */
-@@ -5505,16 +5627,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5497,7 +5712,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+ {
+   rtx op0, op1, op2;
+   const struct cpu_cost_table *extra_cost
+-    = aarch64_tune_params->insn_extra_cost;
++    = aarch64_tune_params.insn_extra_cost;
+   machine_mode mode = GET_MODE (x);
+ 
+   /* By default, assume that everything has equivalent cost to the
+@@ -5505,16 +5720,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
       above this default.  */
    *cost = COSTS_N_INSNS (1);
  
@@ -997,7 +1913,7 @@
    switch (code)
      {
      case SET:
-@@ -5529,7 +5641,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5529,7 +5734,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	  if (speed)
  	    {
  	      rtx address = XEXP (op0, 0);
@@ -1008,7 +1924,7 @@
  		*cost += extra_cost->ldst.store;
  	      else if (mode == SFmode)
  		*cost += extra_cost->ldst.storef;
-@@ -5550,15 +5664,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5550,15 +5757,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  
  	  /* Fall through.  */
  	case REG:
@@ -1037,7 +1953,7 @@
            else
  	    /* Cost is just the cost of the RHS of the set.  */
  	    *cost += rtx_cost (op1, SET, 1, speed);
-@@ -5656,7 +5777,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5656,7 +5870,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	     approximation for the additional cost of the addressing
  	     mode.  */
  	  rtx address = XEXP (x, 0);
@@ -1048,7 +1964,7 @@
  	    *cost += extra_cost->ldst.load;
  	  else if (mode == SFmode)
  	    *cost += extra_cost->ldst.loadf;
-@@ -5673,6 +5796,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5673,6 +5889,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      case NEG:
        op0 = XEXP (x, 0);
  
@@ -1065,7 +1981,7 @@
        if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
         {
            if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
-@@ -5717,7 +5850,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5717,7 +5943,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      case CLRSB:
      case CLZ:
        if (speed)
@@ -1079,7 +1995,7 @@
  
        return false;
  
-@@ -5796,12 +5934,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5796,12 +6027,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  
            if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
              {
@@ -1107,7 +2023,7 @@
        return false;
  
      case MINUS:
-@@ -5810,6 +5963,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+@@ -5810,6 +6056,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	op1 = XEXP (x, 1);
  
  cost_minus:
@@ -1116,7 +2032,7 @@
  	/* Detect valid immediates.  */
  	if ((GET_MODE_CLASS (mode) == MODE_INT
  	     || (GET_MODE_CLASS (mode) == MODE_CC
-@@ -5817,20 +5972,17 @@ cost_minus:
+@@ -5817,20 +6065,17 @@ cost_minus:
  	    && CONST_INT_P (op1)
  	    && aarch64_uimm12_shift (INTVAL (op1)))
  	  {
@@ -1138,7 +2054,7 @@
  
  	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
  			       (enum rtx_code) GET_CODE (op1),
-@@ -5842,13 +5994,12 @@ cost_minus:
+@@ -5842,13 +6087,12 @@ cost_minus:
  
  	/* Cost this as an FMA-alike operation.  */
  	if ((GET_CODE (new_op1) == MULT
@@ -1153,7 +2069,7 @@
  	    return true;
  	  }
  
-@@ -5856,12 +6007,21 @@ cost_minus:
+@@ -5856,12 +6100,21 @@ cost_minus:
  
  	if (speed)
  	  {
@@ -1180,7 +2096,7 @@
  	  }
  	return true;
        }
-@@ -5895,11 +6055,13 @@ cost_plus:
+@@ -5895,11 +6148,13 @@ cost_plus:
  	    return true;
  	  }
  
@@ -1195,7 +2111,7 @@
  
  	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
  			       (enum rtx_code) GET_CODE (op0),
-@@ -5912,25 +6074,32 @@ cost_plus:
+@@ -5912,25 +6167,32 @@ cost_plus:
  	new_op0 = aarch64_strip_extend (op0);
  
  	if (GET_CODE (new_op0) == MULT
@@ -1237,7 +2153,7 @@
  	  }
  	return true;
        }
-@@ -5939,8 +6108,12 @@ cost_plus:
+@@ -5939,8 +6201,12 @@ cost_plus:
        *cost = COSTS_N_INSNS (1);
  
        if (speed)
@@ -1252,7 +2168,7 @@
        return false;
  
      case IOR:
-@@ -5948,8 +6121,22 @@ cost_plus:
+@@ -5948,8 +6214,22 @@ cost_plus:
          {
            *cost = COSTS_N_INSNS (1);
  
@@ -1276,7 +2192,7 @@
  
            return true;
          }
-@@ -5960,6 +6147,13 @@ cost_plus:
+@@ -5960,6 +6240,13 @@ cost_plus:
        op0 = XEXP (x, 0);
        op1 = XEXP (x, 1);
  
@@ -1290,7 +2206,7 @@
        if (code == AND
            && GET_CODE (op0) == MULT
            && CONST_INT_P (XEXP (op0, 1))
-@@ -6025,13 +6219,52 @@ cost_plus:
+@@ -6025,13 +6312,52 @@ cost_plus:
        return false;
  
      case NOT:
@@ -1346,7 +2262,7 @@
        return false;
  
      case ZERO_EXTEND:
-@@ -6067,10 +6300,19 @@ cost_plus:
+@@ -6067,10 +6393,19 @@ cost_plus:
  	  return true;
  	}
  
@@ -1369,7 +2285,7 @@
        return false;
  
      case SIGN_EXTEND:
-@@ -6090,7 +6332,12 @@ cost_plus:
+@@ -6090,7 +6425,12 @@ cost_plus:
  	}
  
        if (speed)
@@ -1383,7 +2299,7 @@
        return false;
  
      case ASHIFT:
-@@ -6099,10 +6346,20 @@ cost_plus:
+@@ -6099,10 +6439,20 @@ cost_plus:
  
        if (CONST_INT_P (op1))
          {
@@ -1407,7 +2323,7 @@
  
            /* We can incorporate zero/sign extend for free.  */
            if (GET_CODE (op0) == ZERO_EXTEND
-@@ -6114,10 +6371,19 @@ cost_plus:
+@@ -6114,10 +6464,19 @@ cost_plus:
          }
        else
          {
@@ -1430,7 +2346,7 @@
  	  return false;  /* All arguments need to be in registers.  */
          }
  
-@@ -6132,7 +6398,12 @@ cost_plus:
+@@ -6132,7 +6491,12 @@ cost_plus:
  	{
  	  /* ASR (immediate) and friends.  */
  	  if (speed)
@@ -1444,7 +2360,7 @@
  
  	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
  	  return true;
-@@ -6142,8 +6413,12 @@ cost_plus:
+@@ -6142,8 +6506,12 @@ cost_plus:
  
  	  /* ASR (register) and friends.  */
  	  if (speed)
@@ -1459,7 +2375,7 @@
  	  return false;  /* All arguments need to be in registers.  */
  	}
  
-@@ -6191,7 +6466,12 @@ cost_plus:
+@@ -6191,7 +6559,12 @@ cost_plus:
      case SIGN_EXTRACT:
        /* UBFX/SBFX.  */
        if (speed)
@@ -1473,7 +2389,7 @@
  
        /* We can trust that the immediates used will be correct (there
  	 are no by-register forms), so we need only cost op0.  */
-@@ -6208,7 +6488,9 @@ cost_plus:
+@@ -6208,7 +6581,9 @@ cost_plus:
      case UMOD:
        if (speed)
  	{
@@ -1484,7 +2400,7 @@
  	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
  		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
  	  else if (GET_MODE (x) == DFmode)
-@@ -6225,7 +6507,9 @@ cost_plus:
+@@ -6225,7 +6600,9 @@ cost_plus:
      case SQRT:
        if (speed)
  	{
@@ -1495,7 +2411,7 @@
  	    /* There is no integer SQRT, so only DIV and UDIV can get
  	       here.  */
  	    *cost += extra_cost->mult[mode == DImode].idiv;
-@@ -6257,7 +6541,12 @@ cost_plus:
+@@ -6257,7 +6634,12 @@ cost_plus:
        op2 = XEXP (x, 2);
  
        if (speed)
@@ -1509,7 +2425,7 @@
  
        /* FMSUB, FNMADD, and FNMSUB are free.  */
        if (GET_CODE (op0) == NEG)
-@@ -6295,14 +6584,36 @@ cost_plus:
+@@ -6295,14 +6677,36 @@ cost_plus:
        *cost += rtx_cost (op2, FMA, 2, speed);
        return true;
  
@@ -1548,7 +2464,7 @@
        return false;
  
      case FIX:
-@@ -6323,15 +6634,37 @@ cost_plus:
+@@ -6323,15 +6727,37 @@ cost_plus:
          }
  
        if (speed)
@@ -1566,14 +2482,14 @@
      case ABS:
 -      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 +      if (VECTOR_MODE_P (mode))
- 	{
--	  /* FABS and FNEG are analogous.  */
++	{
 +	  /* ABS (vector).  */
 +	  if (speed)
 +	    *cost += extra_cost->vect.alu;
 +	}
 +      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+	{
+ 	{
+-	  /* FABS and FNEG are analogous.  */
 +	  op0 = XEXP (x, 0);
 +
 +	  /* FABD, which is analogous to FADD.  */
@@ -1590,7 +2506,7 @@
  	  if (speed)
  	    *cost += extra_cost->fp[mode == DFmode].neg;
  	}
-@@ -6350,10 +6683,15 @@ cost_plus:
+@@ -6350,10 +6776,15 @@ cost_plus:
      case SMIN:
        if (speed)
  	{
@@ -1610,44 +2526,406 @@
  	}
        return false;
  
-@@ -7830,6 +8168,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
-   return -1;
+@@ -6447,7 +6878,7 @@ aarch64_register_move_cost (machine_mode mode,
+   enum reg_class from = (enum reg_class) from_i;
+   enum reg_class to = (enum reg_class) to_i;
+   const struct cpu_regmove_cost *regmove_cost
+-    = aarch64_tune_params->regmove_cost;
++    = aarch64_tune_params.regmove_cost;
+ 
+   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
+   if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
+@@ -6502,14 +6933,14 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
+ 			  reg_class_t rclass ATTRIBUTE_UNUSED,
+ 			  bool in ATTRIBUTE_UNUSED)
+ {
+-  return aarch64_tune_params->memmov_cost;
++  return aarch64_tune_params.memmov_cost;
  }
  
-+/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
-+   type as described in AAPCS64 \S 4.1.2.
+ /* Return the number of instructions that can be issued per cycle.  */
+ static int
+ aarch64_sched_issue_rate (void)
+ {
+-  return aarch64_tune_params->issue_rate;
++  return aarch64_tune_params.issue_rate;
+ }
+ 
+ static int
+@@ -6533,44 +6964,44 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+   switch (type_of_cost)
+     {
+       case scalar_stmt:
+-	return aarch64_tune_params->vec_costs->scalar_stmt_cost;
++	return aarch64_tune_params.vec_costs->scalar_stmt_cost;
+ 
+       case scalar_load:
+-	return aarch64_tune_params->vec_costs->scalar_load_cost;
++	return aarch64_tune_params.vec_costs->scalar_load_cost;
+ 
+       case scalar_store:
+-	return aarch64_tune_params->vec_costs->scalar_store_cost;
++	return aarch64_tune_params.vec_costs->scalar_store_cost;
+ 
+       case vector_stmt:
+-	return aarch64_tune_params->vec_costs->vec_stmt_cost;
++	return aarch64_tune_params.vec_costs->vec_stmt_cost;
+ 
+       case vector_load:
+-	return aarch64_tune_params->vec_costs->vec_align_load_cost;
++	return aarch64_tune_params.vec_costs->vec_align_load_cost;
+ 
+       case vector_store:
+-	return aarch64_tune_params->vec_costs->vec_store_cost;
++	return aarch64_tune_params.vec_costs->vec_store_cost;
+ 
+       case vec_to_scalar:
+-	return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
++	return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
+ 
+       case scalar_to_vec:
+-	return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
++	return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
+ 
+       case unaligned_load:
+-	return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
++	return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
+ 
+       case unaligned_store:
+-	return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
++	return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
+ 
+       case cond_branch_taken:
+-	return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
++	return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
+ 
+       case cond_branch_not_taken:
+-	return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
++	return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
+ 
+       case vec_perm:
+       case vec_promote_demote:
+-	return aarch64_tune_params->vec_costs->vec_stmt_cost;
++	return aarch64_tune_params.vec_costs->vec_stmt_cost;
+ 
+       case vec_construct:
+         elements = TYPE_VECTOR_SUBPARTS (vectype);
+@@ -6813,9 +7244,181 @@ aarch64_parse_tune (void)
+   return;
+ }
+ 
++/* Parse TOKEN, which has length LENGTH to see if it is an option
++   described in FLAG.  If it is, return the index bit for that fusion type.
++   If not, error (printing OPTION_NAME) and return zero.  */
++
++static unsigned int
++aarch64_parse_one_option_token (const char *token,
++				size_t length,
++				const struct aarch64_flag_desc *flag,
++				const char *option_name)
++{
++  for (; flag->name != NULL; flag++)
++    {
++      if (length == strlen (flag->name)
++	  && !strncmp (flag->name, token, length))
++	return flag->flag;
++    }
 +
-+   See the comment above aarch64_composite_type_p for the notes on MODE.  */
++  error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
++  return 0;
++}
 +
-+static bool
-+aarch64_short_vector_p (const_tree type,
-+			machine_mode mode)
++/* Parse OPTION which is a comma-separated list of flags to enable.
++   FLAGS gives the list of flags we understand, INITIAL_STATE gives any
++   default state we inherit from the CPU tuning structures.  OPTION_NAME
++   gives the top-level option we are parsing in the -moverride string,
++   for use in error messages.  */
++
++static unsigned int
++aarch64_parse_boolean_options (const char *option,
++			       const struct aarch64_flag_desc *flags,
++			       unsigned int initial_state,
++			       const char *option_name)
++{
++  const char separator = '.';
++  const char* specs = option;
++  const char* ntoken = option;
++  unsigned int found_flags = initial_state;
++
++  while ((ntoken = strchr (specs, separator)))
++    {
++      size_t token_length = ntoken - specs;
++      unsigned token_ops = aarch64_parse_one_option_token (specs,
++							   token_length,
++							   flags,
++							   option_name);
++      /* If we find "none" (or, for simplicity's sake, an error) anywhere
++	 in the token stream, reset the supported operations.  So:
++
++	   adrp+add.cmp+branch.none.adrp+add
++
++	   would have the result of turning on only adrp+add fusion.  */
++      if (!token_ops)
++	found_flags = 0;
++
++      found_flags |= token_ops;
++      specs = ++ntoken;
++    }
++
++  /* We ended with a comma, print something.  */
++  if (!(*specs))
++    {
++      error ("%s string ill-formed\n", option_name);
++      return 0;
++    }
++
++  /* We still have one more token to parse.  */
++  size_t token_length = strlen (specs);
++  unsigned token_ops = aarch64_parse_one_option_token (specs,
++						       token_length,
++						       flags,
++						       option_name);
++   if (!token_ops)
++     found_flags = 0;
++
++  found_flags |= token_ops;
++  return found_flags;
++}
++
++/* Support for overriding instruction fusion.  */
++
++static void
++aarch64_parse_fuse_string (const char *fuse_string,
++			    struct tune_params *tune)
 +{
-+  HOST_WIDE_INT size = -1;
++  tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
++						     aarch64_fusible_pairs,
++						     tune->fusible_ops,
++						     "fuse=");
++}
 +
-+  if (type && TREE_CODE (type) == VECTOR_TYPE)
-+    size = int_size_in_bytes (type);
-+  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-+	    || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
-+    size = GET_MODE_SIZE (mode);
++/* Support for overriding other tuning flags.  */
 +
-+  return (size == 8 || size == 16);
++static void
++aarch64_parse_tune_string (const char *tune_string,
++			    struct tune_params *tune)
++{
++  tune->extra_tuning_flags
++    = aarch64_parse_boolean_options (tune_string,
++				     aarch64_tuning_flags,
++				     tune->extra_tuning_flags,
++				     "tune=");
 +}
 +
- /* Return TRUE if the type, as described by TYPE and MODE, is a composite
-    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
-    array types.  The C99 floating-point complex types are also considered
-@@ -7851,6 +8209,9 @@ static bool
- aarch64_composite_type_p (const_tree type,
- 			  machine_mode mode)
- {
-+  if (aarch64_short_vector_p (type, mode))
-+    return false;
++/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
++   we understand.  If it is, extract the option string and handoff to
++   the appropriate function.  */
 +
-   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
++void
++aarch64_parse_one_override_token (const char* token,
++				  size_t length,
++				  struct tune_params *tune)
++{
++  const struct aarch64_tuning_override_function *fn
++    = aarch64_tuning_override_functions;
++
++  const char *option_part = strchr (token, '=');
++  if (!option_part)
++    {
++      error ("tuning string missing in option (%s)", token);
++      return;
++    }
++
++  /* Get the length of the option name.  */
++  length = option_part - token;
++  /* Skip the '=' to get to the option string.  */
++  option_part++;
++
++  for (; fn->name != NULL; fn++)
++    {
++      if (!strncmp (fn->name, token, length))
++	{
++	  fn->parse_override (option_part, tune);
++	  return;
++	}
++    }
++
++  error ("unknown tuning option (%s)",token);
++  return;
++}
++
++/* Parse STRING looking for options in the format:
++     string	:: option:string
++     option	:: name=substring
++     name	:: {a-z}
++     substring	:: defined by option.  */
++
++static void
++aarch64_parse_override_string (const char* input_string,
++			       struct tune_params* tune)
++{
++  const char separator = ':';
++  size_t string_length = strlen (input_string) + 1;
++  char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
++  char *string = string_root;
++  strncpy (string, input_string, string_length);
++  string[string_length - 1] = '\0';
++
++  char* ntoken = string;
++
++  while ((ntoken = strchr (string, separator)))
++    {
++      size_t token_length = ntoken - string;
++      /* Make this substring look like a string.  */
++      *ntoken = '\0';
++      aarch64_parse_one_override_token (string, token_length, tune);
++      string = ++ntoken;
++    }
++
++  /* One last option to parse.  */
++  aarch64_parse_one_override_token (string, strlen (string), tune);
++  free (string_root);
++}
++
++/* Implement TARGET_OPTION_OVERRIDE.  */
+ 
+-/* Implement TARGET_OPTION_OVERRIDE.  */
+-
+ static void
+ aarch64_override_options (void)
+ {
+@@ -6872,9 +7475,15 @@ aarch64_override_options (void)
+ 
+   aarch64_tune_flags = selected_tune->flags;
+   aarch64_tune = selected_tune->core;
+-  aarch64_tune_params = selected_tune->tune;
++  /* Make a copy of the tuning parameters attached to the core, which
++     we may later overwrite.  */
++  aarch64_tune_params = *(selected_tune->tune);
+   aarch64_architecture_version = selected_cpu->architecture_version;
+ 
++  if (aarch64_override_tune_string)
++    aarch64_parse_override_string (aarch64_override_tune_string,
++				   &aarch64_tune_params);
++
+   if (aarch64_fix_a53_err835769 == 2)
+     {
+ #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
+@@ -6884,6 +7493,8 @@ aarch64_override_options (void)
+ #endif
+     }
+ 
++  aarch64_register_fma_steering ();
++
+   aarch64_override_options_after_change ();
+ }
+ 
+@@ -6902,11 +7513,11 @@ aarch64_override_options_after_change (void)
+   if (!optimize_size)
+     {
+       if (align_loops <= 0)
+-	align_loops = aarch64_tune_params->loop_align;
++	align_loops = aarch64_tune_params.loop_align;
+       if (align_jumps <= 0)
+-	align_jumps = aarch64_tune_params->jump_align;
++	align_jumps = aarch64_tune_params.jump_align;
+       if (align_functions <= 0)
+-	align_functions = aarch64_tune_params->function_align;
++	align_functions = aarch64_tune_params.function_align;
+     }
+ }
+ 
+@@ -7104,16 +7715,13 @@ aarch64_valid_floating_const (machine_mode mode, rtx x)
+   if (!CONST_DOUBLE_P (x))
+     return false;
+ 
+-  /* TODO: We could handle moving 0.0 to a TFmode register,
+-     but first we would like to refactor the movtf_aarch64
+-     to be more amicable to split moves properly and
+-     correctly gate on TARGET_SIMD.  For now - reject all
+-     constants which are not to SFmode or DFmode registers.  */
++  if (aarch64_float_const_zero_rtx_p (x))
++    return true;
++
++  /* We only handle moving 0.0 to a TFmode register.  */
+   if (!(mode == SFmode || mode == DFmode))
+     return false;
+ 
+-  if (aarch64_float_const_zero_rtx_p (x))
+-    return true;
+   return aarch64_float_const_representable_p (x);
+ }
+ 
+@@ -7247,9 +7855,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+ 
+   if (!TARGET_FLOAT)
+     {
+-      if (cum->aapcs_nvrn > 0)
+-	sorry ("%qs and floating point or vector arguments",
+-	       "-mgeneral-regs-only");
++      gcc_assert (cum->aapcs_nvrn == 0);
+       vr_save_area_size = 0;
+     }
+ 
+@@ -7356,8 +7962,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+     {
+       /* TYPE passed in fp/simd registers.  */
+       if (!TARGET_FLOAT)
+-	sorry ("%qs and floating point or vector arguments",
+-	       "-mgeneral-regs-only");
++	aarch64_err_no_fpadvsimd (mode, "varargs");
+ 
+       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
+ 		      unshare_expr (valist), f_vrtop, NULL_TREE);
+@@ -7594,9 +8199,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
+ 
+   if (!TARGET_FLOAT)
+     {
+-      if (local_cum.aapcs_nvrn > 0)
+-	sorry ("%qs and floating point or vector arguments",
+-	       "-mgeneral-regs-only");
++      gcc_assert (local_cum.aapcs_nvrn == 0);
+       vr_saved = 0;
+     }
+ 
+@@ -7830,6 +8433,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
+   return -1;
+ }
+ 
++/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
++   type as described in AAPCS64 \S 4.1.2.
++
++   See the comment above aarch64_composite_type_p for the notes on MODE.  */
++
++static bool
++aarch64_short_vector_p (const_tree type,
++			machine_mode mode)
++{
++  HOST_WIDE_INT size = -1;
++
++  if (type && TREE_CODE (type) == VECTOR_TYPE)
++    size = int_size_in_bytes (type);
++  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
++	    || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
++    size = GET_MODE_SIZE (mode);
++
++  return (size == 8 || size == 16);
++}
++
+ /* Return TRUE if the type, as described by TYPE and MODE, is a composite
+    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
+    array types.  The C99 floating-point complex types are also considered
+@@ -7851,6 +8474,9 @@ static bool
+ aarch64_composite_type_p (const_tree type,
+ 			  machine_mode mode)
+ {
++  if (aarch64_short_vector_p (type, mode))
++    return false;
++
+   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
      return true;
  
-@@ -7862,27 +8223,6 @@ aarch64_composite_type_p (const_tree type,
+@@ -7862,27 +8488,6 @@ aarch64_composite_type_p (const_tree type,
    return false;
  }
  
@@ -1675,7 +2953,15 @@
  /* Return TRUE if an argument, whose type is described by TYPE and MODE,
     shall be passed or returned in simd/fp register(s) (providing these
     parameter passing registers are available).
-@@ -8581,24 +8921,6 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+@@ -8575,30 +9180,12 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+   if (lane < low || lane >= high)
+   {
+     if (exp)
+-      error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
++      error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
+     else
+-      error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
++      error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
    }
  }
  
@@ -1700,7 +2986,7 @@
  /* Return TRUE if OP is a valid vector addressing mode.  */
  bool
  aarch64_simd_mem_operand_p (rtx op)
-@@ -8781,22 +9103,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -8781,22 +9368,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)
    machine_mode mode = GET_MODE (target);
    machine_mode inner_mode = GET_MODE_INNER (mode);
    int n_elts = GET_MODE_NUNITS (mode);
@@ -1731,7 +3017,7 @@
  	all_same = false;
      }
  
-@@ -8813,36 +9132,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -8813,36 +9397,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)
    /* Splat a single non-constant element if we can.  */
    if (all_same)
      {
@@ -1807,1411 +3093,9556 @@
      emit_move_insn (adjust_address_nv (mem, inner_mode,
  				    i * GET_MODE_SIZE (inner_mode)),
  		    XVECEXP (vals, 0, i));
---- a/src/gcc/config/aarch64/aarch64.h
-+++ b/src/gcc/config/aarch64/aarch64.h
-@@ -506,7 +506,7 @@ enum reg_class
- 
- enum target_cpus
+@@ -9015,7 +9623,23 @@ aarch64_expand_compare_and_swap (rtx operands[])
  {
--#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
-+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
-   TARGET_CPU_##INTERNAL_IDENT,
- #include "aarch64-cores.def"
- #undef AARCH64_CORE
-@@ -823,7 +823,8 @@ do {									     \
- #define TRAMPOLINE_SECTION text_section
+   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+   machine_mode mode, cmp_mode;
+-  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
++  typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
++  int idx;
++  gen_cas_fn gen;
++  const gen_cas_fn split_cas[] =
++  {
++    gen_aarch64_compare_and_swapqi,
++    gen_aarch64_compare_and_swaphi,
++    gen_aarch64_compare_and_swapsi,
++    gen_aarch64_compare_and_swapdi
++  };
++  const gen_cas_fn atomic_cas[] =
++  {
++    gen_aarch64_compare_and_swapqi_lse,
++    gen_aarch64_compare_and_swaphi_lse,
++    gen_aarch64_compare_and_swapsi_lse,
++    gen_aarch64_compare_and_swapdi_lse
++  };
+ 
+   bval = operands[0];
+   rval = operands[1];
+@@ -9060,13 +9684,17 @@ aarch64_expand_compare_and_swap (rtx operands[])
  
- /* To start with.  */
--#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
-+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
-+  (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
- 
+   switch (mode)
+     {
+-    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+-    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+-    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+-    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
++    case QImode: idx = 0; break;
++    case HImode: idx = 1; break;
++    case SImode: idx = 2; break;
++    case DImode: idx = 3; break;
+     default:
+       gcc_unreachable ();
+     }
++  if (TARGET_LSE)
++    gen = atomic_cas[idx];
++  else
++    gen = split_cas[idx];
  
- /* Assembly output.  */
-@@ -929,11 +930,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
- #define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
-   { "rewrite_mcpu", aarch64_rewrite_mcpu },
+   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
  
-+#if defined(__aarch64__)
-+extern const char *host_detect_local_cpu (int argc, const char **argv);
-+# define EXTRA_SPEC_FUNCTIONS						\
-+  { "local_cpu_detect", host_detect_local_cpu },			\
-+  BIG_LITTLE_CPU_SPEC_FUNCTIONS
+@@ -9078,6 +9706,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
+   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
+ }
+ 
++/* Test whether the target supports using a atomic load-operate instruction.
++   CODE is the operation and AFTER is TRUE if the data in memory after the
++   operation should be returned and FALSE if the data before the operation
++   should be returned.  Returns FALSE if the operation isn't supported by the
++   architecture.  */
 +
-+# define MCPU_MTUNE_NATIVE_SPECS					\
-+   " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
-+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
-+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
-+#else
-+# define MCPU_MTUNE_NATIVE_SPECS ""
-+# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
-+#endif
++bool
++aarch64_atomic_ldop_supported_p (enum rtx_code code)
++{
++  if (!TARGET_LSE)
++    return false;
 +
- #define ASM_CPU_SPEC \
-    BIG_LITTLE_SPEC
- 
--#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
--
- #define EXTRA_SPECS						\
-   { "asm_cpu_spec",		ASM_CPU_SPEC }
++  switch (code)
++    {
++    case SET:
++    case AND:
++    case IOR:
++    case XOR:
++    case MINUS:
++    case PLUS:
++      return true;
++    default:
++      return false;
++    }
++}
++
+ /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
+    sequence implementing an atomic operation.  */
  
---- a/src/gcc/config/aarch64/aarch64.md
-+++ b/src/gcc/config/aarch64/aarch64.md
-@@ -1414,18 +1414,28 @@
-   "
-   if (! aarch64_plus_operand (operands[2], VOIDmode))
-     {
--      rtx subtarget = ((optimize && can_create_pseudo_p ())
--		       ? gen_reg_rtx (<MODE>mode) : operands[0]);
-       HOST_WIDE_INT imm = INTVAL (operands[2]);
+@@ -9095,6 +9749,42 @@ aarch64_emit_post_barrier (enum memmodel model)
+     }
+ }
  
--      if (imm < 0)
--	imm = -(-imm & ~0xfff);
-+      if (aarch64_move_imm (imm, <MODE>mode) && can_create_pseudo_p ())
-+        {
-+	  rtx tmp = gen_reg_rtx (<MODE>mode);
-+	  emit_move_insn (tmp, operands[2]);
-+	  operands[2] = tmp;
-+        }
-       else
--        imm &= ~0xfff;
--
--      emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
--      operands[1] = subtarget;
--      operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
-+        {
-+	  rtx subtarget = ((optimize && can_create_pseudo_p ())
-+			   ? gen_reg_rtx (<MODE>mode) : operands[0]);
++/* Emit an atomic compare-and-swap operation.  RVAL is the destination register
++   for the data in memory.  EXPECTED is the value expected to be in memory.
++   DESIRED is the value to store to memory.  MEM is the memory location.  MODEL
++   is the memory ordering to use.  */
 +
-+	  if (imm < 0)
-+	    imm = -(-imm & ~0xfff);
-+	  else
-+	    imm &= ~0xfff;
++void
++aarch64_gen_atomic_cas (rtx rval, rtx mem,
++			rtx expected, rtx desired,
++			rtx model)
++{
++  rtx (*gen) (rtx, rtx, rtx, rtx);
++  machine_mode mode;
 +
-+	  emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
-+	  operands[1] = subtarget;
-+	  operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
-+        }
-     }
-   "
- )
-@@ -1529,6 +1539,38 @@
-   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
- )
- 
-+(define_insn "*adds_shift_imm_<mode>"
-+  [(set (reg:CC_NZ CC_REGNUM)
-+	(compare:CC_NZ
-+	 (plus:GPI (ASHIFT:GPI 
-+		    (match_operand:GPI 1 "register_operand" "r")
-+		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
-+		   (match_operand:GPI 3 "register_operand" "r"))
-+	 (const_int 0)))
-+   (set (match_operand:GPI 0 "register_operand" "=r")
-+	(plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))
-+		  (match_dup 3)))]
-+  ""
-+  "adds\\t%<w>0, %<w>3, %<w>1, <shift> %2"
-+  [(set_attr "type" "alus_shift_imm")]
-+)
++  mode = GET_MODE (mem);
 +
-+(define_insn "*subs_shift_imm_<mode>"
-+  [(set (reg:CC_NZ CC_REGNUM)
-+	(compare:CC_NZ
-+	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
-+		    (ASHIFT:GPI
-+		     (match_operand:GPI 2 "register_operand" "r")
-+		     (match_operand:QI 3 "aarch64_shift_imm_<mode>" "n")))
-+	 (const_int 0)))
-+   (set (match_operand:GPI 0 "register_operand" "=r")
-+	(minus:GPI (match_dup 1)
-+		   (ASHIFT:GPI (match_dup 2) (match_dup 3))))]
-+  ""
-+  "subs\\t%<w>0, %<w>1, %<w>2, <shift> %3"
-+  [(set_attr "type" "alus_shift_imm")]
-+)
++  switch (mode)
++    {
++    case QImode: gen = gen_aarch64_atomic_casqi; break;
++    case HImode: gen = gen_aarch64_atomic_cashi; break;
++    case SImode: gen = gen_aarch64_atomic_cassi; break;
++    case DImode: gen = gen_aarch64_atomic_casdi; break;
++    default:
++      gcc_unreachable ();
++    }
 +
- (define_insn "*adds_mul_imm_<mode>"
-   [(set (reg:CC_NZ CC_REGNUM)
- 	(compare:CC_NZ
-@@ -1589,6 +1631,42 @@
-   [(set_attr "type" "alus_ext")]
- )
++  /* Move the expected value into the CAS destination register.  */
++  emit_insn (gen_rtx_SET (VOIDmode, rval, expected));
++
++  /* Emit the CAS.  */
++  emit_insn (gen (rval, mem, desired, model));
++
++  /* Compare the expected value with the value loaded by the CAS, to establish
++     whether the swap was made.  */
++  aarch64_gen_compare_reg (EQ, rval, expected);
++}
++
+ /* Split a compare and swap pattern.  */
  
-+(define_insn "*adds_<optab><ALLX:mode>_shift_<GPI:mode>"
-+  [(set (reg:CC_NZ CC_REGNUM)
-+	(compare:CC_NZ
-+	 (plus:GPI (ashift:GPI 
-+		    (ANY_EXTEND:GPI 
-+		     (match_operand:ALLX 1 "register_operand" "r"))
-+		    (match_operand 2 "aarch64_imm3" "Ui3"))
-+		   (match_operand:GPI 3 "register_operand" "r"))
-+	 (const_int 0)))
-+   (set (match_operand:GPI 0 "register_operand" "=rk")
-+	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1))
-+			      (match_dup 2))
-+		  (match_dup 3)))]
-+  ""
-+  "adds\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
-+  [(set_attr "type" "alus_ext")]
-+)
+ void
+@@ -9163,11 +9853,257 @@ aarch64_split_compare_and_swap (rtx operands[])
+     aarch64_emit_post_barrier (model);
+ }
+ 
++/* Emit a BIC instruction.  */
 +
-+(define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>"
-+  [(set (reg:CC_NZ CC_REGNUM)
-+	(compare:CC_NZ
-+	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
-+		    (ashift:GPI 
-+		     (ANY_EXTEND:GPI
-+		      (match_operand:ALLX 2 "register_operand" "r"))
-+		     (match_operand 3 "aarch64_imm3" "Ui3")))
-+	 (const_int 0)))
-+   (set (match_operand:GPI 0 "register_operand" "=rk")
-+	(minus:GPI (match_dup 1)
-+		   (ashift:GPI (ANY_EXTEND:GPI (match_dup 2))
-+			       (match_dup 3))))]
-+  ""
-+  "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
-+  [(set_attr "type" "alus_ext")]
-+)
++static void
++aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
++{
++  rtx shift_rtx = GEN_INT (shift);
++  rtx (*gen) (rtx, rtx, rtx, rtx);
 +
- (define_insn "*adds_<optab><mode>_multp2"
-   [(set (reg:CC_NZ CC_REGNUM)
- 	(compare:CC_NZ
-@@ -1884,6 +1962,38 @@
-   [(set_attr "type" "adc_reg")]
- )
- 
-+(define_insn "*add_uxt<mode>_shift2"
-+  [(set (match_operand:GPI 0 "register_operand" "=rk")
-+	(plus:GPI (and:GPI
-+		   (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-+			       (match_operand 2 "aarch64_imm3" "Ui3"))
-+		   (match_operand 3 "const_int_operand" "n"))
-+		  (match_operand:GPI 4 "register_operand" "r")))]
-+  "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
-+  "*
-+  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]),
-+					   INTVAL (operands[3])));
-+  return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
-+  [(set_attr "type" "alu_ext")]
-+)
++  switch (mode)
++    {
++    case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
++    case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
++    default:
++      gcc_unreachable ();
++    }
 +
-+;; zero_extend version of above
-+(define_insn "*add_uxtsi_shift2_uxtw"
-+  [(set (match_operand:DI 0 "register_operand" "=rk")
-+	(zero_extend:DI
-+	 (plus:SI (and:SI
-+		   (ashift:SI (match_operand:SI 1 "register_operand" "r")
-+			      (match_operand 2 "aarch64_imm3" "Ui3"))
-+		   (match_operand 3 "const_int_operand" "n"))
-+		  (match_operand:SI 4 "register_operand" "r"))))]
-+  "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
-+  "*
-+  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
-+					   INTVAL (operands[3])));
-+  return \"add\t%w0, %w4, %w1, uxt%e3 %2\";"
-+  [(set_attr "type" "alu_ext")]
-+)
++  emit_insn (gen (dst, s2, shift_rtx, s1));
++}
 +
- (define_insn "*add_uxt<mode>_multp2"
-   [(set (match_operand:GPI 0 "register_operand" "=rk")
- 	(plus:GPI (and:GPI
-@@ -2140,6 +2250,38 @@
-   [(set_attr "type" "adc_reg")]
- )
- 
-+(define_insn "*sub_uxt<mode>_shift2"
-+  [(set (match_operand:GPI 0 "register_operand" "=rk")
-+	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
-+		   (and:GPI
-+		    (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-+				(match_operand 2 "aarch64_imm3" "Ui3"))
-+		    (match_operand 3 "const_int_operand" "n"))))]
-+  "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
-+  "*
-+  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
-+					   INTVAL (operands[3])));
-+  return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
-+  [(set_attr "type" "alu_ext")]
-+)
++/* Emit an atomic swap.  */
 +
-+;; zero_extend version of above
-+(define_insn "*sub_uxtsi_shift2_uxtw"
-+  [(set (match_operand:DI 0 "register_operand" "=rk")
-+	(zero_extend:DI
-+	 (minus:SI (match_operand:SI 4 "register_operand" "rk")
-+		   (and:SI
-+		    (ashift:SI (match_operand:SI 1 "register_operand" "r")
-+			       (match_operand 2 "aarch64_imm3" "Ui3"))
-+		    (match_operand 3 "const_int_operand" "n")))))]
-+  "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
-+  "*
-+  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
-+					   INTVAL (operands[3])));
-+  return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";"
-+  [(set_attr "type" "alu_ext")]
-+)
++static void
++aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
++			  rtx mem, rtx model)
++{
++  rtx (*gen) (rtx, rtx, rtx, rtx);
 +
- (define_insn "*sub_uxt<mode>_multp2"
-   [(set (match_operand:GPI 0 "register_operand" "=rk")
- 	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
-@@ -3058,6 +3200,26 @@
-    (set_attr "simd" "*,yes")]
- )
- 
-+(define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(zero_extend:DI
-+	  (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
-+	               (match_operand:SI 2 "register_operand" "r"))))]
-+  ""
-+  "<NLOGICAL:nlogical>\\t%w0, %w2, %w1"
-+  [(set_attr "type" "logic_reg")]
-+)
++  switch (mode)
++    {
++    case QImode: gen = gen_aarch64_atomic_swpqi; break;
++    case HImode: gen = gen_aarch64_atomic_swphi; break;
++    case SImode: gen = gen_aarch64_atomic_swpsi; break;
++    case DImode: gen = gen_aarch64_atomic_swpdi; break;
++    default:
++      gcc_unreachable ();
++    }
 +
-+(define_insn "*xor_one_cmplsidi3_ze"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+        (zero_extend:DI
-+          (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r")
-+                          (match_operand:SI 2 "register_operand" "r")))))]
-+  ""
-+  "eon\\t%w0, %w1, %w2"
-+  [(set_attr "type" "logic_reg")]
-+)
++  emit_insn (gen (dst, mem, value, model));
++}
 +
- ;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)).
- ;; eon does not operate on SIMD registers so the vector variant must be split.
- (define_insn_and_split "*xor_one_cmpl<mode>3"
-@@ -3131,6 +3293,32 @@
-   [(set_attr "type" "logics_shift_imm")]
- )
- 
-+(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"
-+  [(set (match_operand:GPI 0 "register_operand" "=r")
-+	(not:GPI (xor:GPI
-+		      (SHIFT:GPI
-+		       (match_operand:GPI 1 "register_operand" "r")
-+		       (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
-+		     (match_operand:GPI 3 "register_operand" "r"))))]
-+  ""
-+  "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
-+  [(set_attr "type" "logic_shift_imm")]
-+)
++/* Operations supported by aarch64_emit_atomic_load_op.  */
 +
-+;; Zero-extend version of the above.
-+(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(zero_extend:DI
-+	  (not:SI (xor:SI
-+		    (SHIFT:SI
-+		      (match_operand:SI 1 "register_operand" "r")
-+		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
-+		    (match_operand:SI 3 "register_operand" "r")))))]
-+  ""
-+  "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"
-+  [(set_attr "type" "logic_shift_imm")]
-+)
++enum aarch64_atomic_load_op_code
++{
++  AARCH64_LDOP_PLUS,	/* A + B  */
++  AARCH64_LDOP_XOR,	/* A ^ B  */
++  AARCH64_LDOP_OR,	/* A | B  */
++  AARCH64_LDOP_BIC	/* A & ~B  */
++};
 +
- (define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
-   [(set (reg:CC_NZ CC_REGNUM)
- 	(compare:CC_NZ
-@@ -3551,6 +3739,21 @@
-   [(set_attr "type" "shift_imm")]
- )
- 
-+;; There are no canonicalisation rules for ashift and lshiftrt inside an ior
-+;; so we have to match both orderings.
-+(define_insn "*extr<mode>5_insn_alt"
-+  [(set (match_operand:GPI 0 "register_operand" "=r")
-+	(ior:GPI  (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
-+			        (match_operand 4 "const_int_operand" "n"))
-+		  (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-+			      (match_operand 3 "const_int_operand" "n"))))]
-+  "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode)
-+   && (UINTVAL (operands[3]) + UINTVAL (operands[4])
-+       == GET_MODE_BITSIZE (<MODE>mode))"
-+  "extr\\t%<w>0, %<w>1, %<w>2, %4"
-+  [(set_attr "type" "shift_imm")]
-+)
++/* Emit an atomic load-operate.  */
 +
- ;; zero_extend version of the above
- (define_insn "*extrsi5_insn_uxtw"
-   [(set (match_operand:DI 0 "register_operand" "=r")
-@@ -3565,6 +3768,19 @@
-   [(set_attr "type" "shift_imm")]
- )
- 
-+(define_insn "*extrsi5_insn_uxtw_alt"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(zero_extend:DI
-+	 (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
-+			       (match_operand 4 "const_int_operand" "n"))
-+		 (ashift:SI (match_operand:SI 1 "register_operand" "r")
-+			    (match_operand 3 "const_int_operand" "n")))))]
-+  "UINTVAL (operands[3]) < 32 &&
-+   (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
-+  "extr\\t%w0, %w1, %w2, %4"
-+  [(set_attr "type" "shift_imm")]
-+)
++static void
++aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
++			     machine_mode mode, rtx dst, rtx src,
++			     rtx mem, rtx model)
++{
++  typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
++  const aarch64_atomic_load_op_fn plus[] =
++  {
++    gen_aarch64_atomic_loadaddqi,
++    gen_aarch64_atomic_loadaddhi,
++    gen_aarch64_atomic_loadaddsi,
++    gen_aarch64_atomic_loadadddi
++  };
++  const aarch64_atomic_load_op_fn eor[] =
++  {
++    gen_aarch64_atomic_loadeorqi,
++    gen_aarch64_atomic_loadeorhi,
++    gen_aarch64_atomic_loadeorsi,
++    gen_aarch64_atomic_loadeordi
++  };
++  const aarch64_atomic_load_op_fn ior[] =
++  {
++    gen_aarch64_atomic_loadsetqi,
++    gen_aarch64_atomic_loadsethi,
++    gen_aarch64_atomic_loadsetsi,
++    gen_aarch64_atomic_loadsetdi
++  };
++  const aarch64_atomic_load_op_fn bic[] =
++  {
++    gen_aarch64_atomic_loadclrqi,
++    gen_aarch64_atomic_loadclrhi,
++    gen_aarch64_atomic_loadclrsi,
++    gen_aarch64_atomic_loadclrdi
++  };
++  aarch64_atomic_load_op_fn gen;
++  int idx = 0;
 +
- (define_insn "*ror<mode>3_insn"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
- 	(rotate:GPI (match_operand:GPI 1 "register_operand" "r")
---- a/src/gcc/config/aarch64/arm_neon.h
-+++ b/src/gcc/config/aarch64/arm_neon.h
-@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a)
- 
- /* vcvt_high_f32_f16 not supported */
- 
--static float32x2_t vdup_n_f32 (float32_t);
--
- #define vcvt_n_f32_s32(a, b)                                            \
-   __extension__                                                         \
-     ({                                                                  \
-@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b)
-        result;                                                          \
-      })
- 
--#define vst1_lane_f32(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       float32x2_t b_ = (b);                                            \
--       float32_t * a_ = (a);                                            \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_f64(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       float64x1_t b_ = (b);                                            \
--       float64_t * a_ = (a);                                            \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_p8(a, b, c)                                           \
--  __extension__                                                         \
--    ({                                                                  \
--       poly8x8_t b_ = (b);                                              \
--       poly8_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_p16(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       poly16x4_t b_ = (b);                                             \
--       poly16_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_s8(a, b, c)                                           \
--  __extension__                                                         \
--    ({                                                                  \
--       int8x8_t b_ = (b);                                               \
--       int8_t * a_ = (a);                                               \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_s16(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       int16x4_t b_ = (b);                                              \
--       int16_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_s32(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       int32x2_t b_ = (b);                                              \
--       int32_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_s64(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       int64x1_t b_ = (b);                                              \
--       int64_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
++  switch (mode)
++    {
++    case QImode: idx = 0; break;
++    case HImode: idx = 1; break;
++    case SImode: idx = 2; break;
++    case DImode: idx = 3; break;
++    default:
++      gcc_unreachable ();
++    }
++
++  switch (code)
++    {
++    case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
++    case AARCH64_LDOP_XOR: gen = eor[idx]; break;
++    case AARCH64_LDOP_OR: gen = ior[idx]; break;
++    case AARCH64_LDOP_BIC: gen = bic[idx]; break;
++    default:
++      gcc_unreachable ();
++    }
++
++  emit_insn (gen (dst, mem, src, model));
++}
++
++/* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
++   location to store the data read from memory.  OUT_RESULT is the location to
++   store the result of the operation.  MEM is the memory location to read and
++   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
++   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
++   be NULL.  */
++
++void
++aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
++			 rtx mem, rtx value, rtx model_rtx)
++{
++  machine_mode mode = GET_MODE (mem);
++  machine_mode wmode = (mode == DImode ? DImode : SImode);
++  const bool short_mode = (mode < SImode);
++  aarch64_atomic_load_op_code ldop_code;
++  rtx src;
++  rtx x;
++
++  if (out_data)
++    out_data = gen_lowpart (mode, out_data);
++
++  if (out_result)
++    out_result = gen_lowpart (mode, out_result);
++
++  /* Make sure the value is in a register, putting it into a destination
++     register if it needs to be manipulated.  */
++  if (!register_operand (value, mode)
++      || code == AND || code == MINUS)
++    {
++      src = out_result ? out_result : out_data;
++      emit_move_insn (src, gen_lowpart (mode, value));
++    }
++  else
++    src = value;
++  gcc_assert (register_operand (src, mode));
++
++  /* Preprocess the data for the operation as necessary.  If the operation is
++     a SET then emit a swap instruction and finish.  */
++  switch (code)
++    {
++    case SET:
++      aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
++      return;
++
++    case MINUS:
++      /* Negate the value and treat it as a PLUS.  */
++      {
++	rtx neg_src;
++
++	/* Resize the value if necessary.  */
++	if (short_mode)
++	  src = gen_lowpart (wmode, src);
++
++	neg_src = gen_rtx_NEG (wmode, src);
++	emit_insn (gen_rtx_SET (VOIDmode, src, neg_src));
++
++	if (short_mode)
++	  src = gen_lowpart (mode, src);
++      }
++      /* Fall-through.  */
++    case PLUS:
++      ldop_code = AARCH64_LDOP_PLUS;
++      break;
++
++    case IOR:
++      ldop_code = AARCH64_LDOP_OR;
++      break;
++
++    case XOR:
++      ldop_code = AARCH64_LDOP_XOR;
++      break;
++
++    case AND:
++      {
++	rtx not_src;
++
++	/* Resize the value if necessary.  */
++	if (short_mode)
++	  src = gen_lowpart (wmode, src);
++
++	not_src = gen_rtx_NOT (wmode, src);
++	emit_insn (gen_rtx_SET (VOIDmode, src, not_src));
++
++	if (short_mode)
++	  src = gen_lowpart (mode, src);
++      }
++      ldop_code = AARCH64_LDOP_BIC;
++      break;
++
++    default:
++      /* The operation can't be done with atomic instructions.  */
++      gcc_unreachable ();
++    }
++
++  aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
++
++  /* If necessary, calculate the data in memory after the update by redoing the
++     operation from values in registers.  */
++  if (!out_result)
++    return;
++
++  if (short_mode)
++    {
++      src = gen_lowpart (wmode, src);
++      out_data = gen_lowpart (wmode, out_data);
++      out_result = gen_lowpart (wmode, out_result);
++    }
++
++  x = NULL_RTX;
++
++  switch (code)
++    {
++    case MINUS:
++    case PLUS:
++      x = gen_rtx_PLUS (wmode, out_data, src);
++      break;
++    case IOR:
++      x = gen_rtx_IOR (wmode, out_data, src);
++      break;
++    case XOR:
++      x = gen_rtx_XOR (wmode, out_data, src);
++      break;
++    case AND:
++      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
++      return;
++    default:
++      gcc_unreachable ();
++    }
++
++  emit_set_insn (out_result, x);
++
++  return;
++}
++
+ /* Split an atomic operation.  */
+ 
+ void
+ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+-		     rtx value, rtx model_rtx, rtx cond)
++			 rtx value, rtx model_rtx, rtx cond)
+ {
+   machine_mode mode = GET_MODE (mem);
+   machine_mode wmode = (mode == DImode ? DImode : SImode);
+@@ -9176,6 +10112,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+   rtx_code_label *label;
+   rtx x;
+ 
++  /* Split the atomic operation into a sequence.  */
+   label = gen_label_rtx ();
+   emit_label (label);
+ 
+@@ -10604,7 +11541,7 @@ aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
+ static bool
+ aarch64_macro_fusion_p (void)
+ {
+-  return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
++  return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
+ }
+ 
+ 
+@@ -10624,7 +11561,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+     return false;
+ 
+   if (simple_sets_p
+-      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
++      && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
+     {
+       /* We are trying to match:
+          prev (mov)  == (set (reg r0) (const_int imm16))
+@@ -10649,7 +11586,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+     }
+ 
+   if (simple_sets_p
+-      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
++      && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
+     {
+ 
+       /*  We're trying to match:
+@@ -10675,7 +11612,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+     }
+ 
+   if (simple_sets_p
+-      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
++      && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
+     {
+ 
+       /* We're trying to match:
+@@ -10704,7 +11641,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+ 
+     }
+   if (simple_sets_p
+-      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
++      && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
+     {
+       /* We're trying to match:
+           prev (adrp) == (set (reg r0)
+@@ -10735,7 +11672,7 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+         }
+     }
+ 
+-  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
++  if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
+       && any_condjump_p (curr))
+     {
+       enum attr_type prev_type = get_attr_type (prev);
+--- a/src/gcc/config/aarch64/aarch64.h
++++ b/src/gcc/config/aarch64/aarch64.h
+@@ -29,6 +29,10 @@
+       builtin_define ("__aarch64__");                   \
+       builtin_define ("__ARM_64BIT_STATE");             \
+       builtin_define_with_int_value                     \
++        ("__ARM_ALIGN_MAX_PWR", 28);                    \
++      builtin_define_with_int_value                     \
++        ("__ARM_ALIGN_MAX_STACK_PWR", 16);              \
++      builtin_define_with_int_value                     \
+         ("__ARM_ARCH", aarch64_architecture_version);   \
+       cpp_define_formatted                                              \
+         (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version);     \
+@@ -198,8 +202,12 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_FL_SIMD       (1 << 0)	/* Has SIMD instructions.  */
+ #define AARCH64_FL_FP         (1 << 1)	/* Has FP.  */
+ #define AARCH64_FL_CRYPTO     (1 << 2)	/* Has crypto.  */
+-#define AARCH64_FL_SLOWMUL    (1 << 3)	/* A slow multiply core.  */
+-#define AARCH64_FL_CRC        (1 << 4)	/* Has CRC.  */
++#define AARCH64_FL_CRC        (1 << 3)	/* Has CRC.  */
++/* ARMv8.1 architecture extensions.  */
++#define AARCH64_FL_LSE	      (1 << 4)  /* Has Large System Extensions.  */
++#define AARCH64_FL_PAN	      (1 << 5)  /* Has Privileged Access Never.  */
++#define AARCH64_FL_LOR	      (1 << 6)  /* Has Limited Ordering regions.  */
++#define AARCH64_FL_RDMA	      (1 << 7)  /* Has ARMv8.1 Adv.SIMD.  */
+ 
+ /* Has FP and SIMD.  */
+ #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
+@@ -209,6 +217,9 @@ extern unsigned aarch64_architecture_version;
+ 
+ /* Architecture flags that effect instruction selection.  */
+ #define AARCH64_FL_FOR_ARCH8       (AARCH64_FL_FPSIMD)
++#define AARCH64_FL_FOR_ARCH8_1			       \
++  (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \
++   | AARCH64_FL_LOR | AARCH64_FL_RDMA)
+ 
+ /* Macros to test ISA flags.  */
+ extern unsigned long aarch64_isa_flags;
+@@ -216,10 +227,7 @@ extern unsigned long aarch64_isa_flags;
+ #define AARCH64_ISA_CRYPTO         (aarch64_isa_flags & AARCH64_FL_CRYPTO)
+ #define AARCH64_ISA_FP             (aarch64_isa_flags & AARCH64_FL_FP)
+ #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
+-
+-/* Macros to test tuning flags.  */
+-extern unsigned long aarch64_tune_flags;
+-#define AARCH64_TUNE_SLOWMUL       (aarch64_tune_flags & AARCH64_FL_SLOWMUL)
++#define AARCH64_ISA_LSE		   (aarch64_isa_flags & AARCH64_FL_LSE)
+ 
+ /* Crypto is an optional extension to AdvSIMD.  */
+ #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
+@@ -227,6 +235,9 @@ extern unsigned long aarch64_tune_flags;
+ /* CRC instructions that can be enabled through +crc arch extension.  */
+ #define TARGET_CRC32 (AARCH64_ISA_CRC)
+ 
++/* Atomic instructions that can be enabled through the +lse extension.  */
++#define TARGET_LSE (AARCH64_ISA_LSE)
++
+ /* Standard register usage.  */
+ 
+ /* 31 64-bit general purpose registers R0-R30:
+@@ -506,7 +517,7 @@ enum reg_class
+ 
+ enum target_cpus
+ {
+-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
++#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+   TARGET_CPU_##INTERNAL_IDENT,
+ #include "aarch64-cores.def"
+ #undef AARCH64_CORE
+@@ -823,7 +834,8 @@ do {									     \
+ #define TRAMPOLINE_SECTION text_section
+ 
+ /* To start with.  */
+-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
++#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
++  (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
+ 
+ 
+ /* Assembly output.  */
+@@ -929,11 +941,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
+ #define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
+   { "rewrite_mcpu", aarch64_rewrite_mcpu },
+ 
++#if defined(__aarch64__)
++extern const char *host_detect_local_cpu (int argc, const char **argv);
++# define EXTRA_SPEC_FUNCTIONS						\
++  { "local_cpu_detect", host_detect_local_cpu },			\
++  BIG_LITTLE_CPU_SPEC_FUNCTIONS
++
++# define MCPU_MTUNE_NATIVE_SPECS					\
++   " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
++   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
++   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
++#else
++# define MCPU_MTUNE_NATIVE_SPECS ""
++# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
++#endif
++
+ #define ASM_CPU_SPEC \
+    BIG_LITTLE_SPEC
+ 
+-#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
 -
--#define vst1_lane_u8(a, b, c)                                           \
--  __extension__                                                         \
--    ({                                                                  \
--       uint8x8_t b_ = (b);                                              \
--       uint8_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
+ #define EXTRA_SPECS						\
+   { "asm_cpu_spec",		ASM_CPU_SPEC }
+ 
+--- a/src/gcc/config/aarch64/aarch64.md
++++ b/src/gcc/config/aarch64/aarch64.md
+@@ -827,7 +827,7 @@
+        gcc_unreachable ();
+      }
+ }
+-  [(set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
++  [(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\
+                      neon_to_gp<q>,neon_from_gp<q>,neon_dup")
+    (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")]
+ )
+@@ -912,7 +912,7 @@
+        DONE;
+     }"
+   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+-                     adr,adr,f_mcr,f_mrc,fmov,fmov")
++                     adr,adr,f_mcr,f_mrc,fmov,neon_move")
+    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ )
+@@ -979,23 +979,25 @@
+   [(set (match_operand:GPF 0 "nonimmediate_operand" "")
+ 	(match_operand:GPF 1 "general_operand" ""))]
+   ""
+-  "
++  {
+     if (!TARGET_FLOAT)
+-     {
+-	sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\");
++      {
++	aarch64_err_no_fpadvsimd (<MODE>mode, "code");
+ 	FAIL;
+-     }
++      }
+ 
+-    if (GET_CODE (operands[0]) == MEM)
++    if (GET_CODE (operands[0]) == MEM
++        && ! (GET_CODE (operands[1]) == CONST_DOUBLE
++	      && aarch64_float_const_zero_rtx_p (operands[1])))
+       operands[1] = force_reg (<MODE>mode, operands[1]);
+-  "
++  }
+ )
+ 
+ (define_insn "*movsf_aarch64"
+   [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+ 	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
+-    || register_operand (operands[1], SFmode))"
++    || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+   "@
+    fmov\\t%s0, %w1
+    fmov\\t%w0, %s1
+@@ -1007,14 +1009,14 @@
+    str\\t%w1, %0
+    mov\\t%w0, %w1"
+   [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+-                     f_loads,f_stores,f_loads,f_stores,mov_reg")]
++                     f_loads,f_stores,load1,store1,mov_reg")]
+ )
+ 
+ (define_insn "*movdf_aarch64"
+   [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+ 	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
+-    || register_operand (operands[1], DFmode))"
++    || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+   "@
+    fmov\\t%d0, %x1
+    fmov\\t%x0, %d1
+@@ -1026,32 +1028,34 @@
+    str\\t%x1, %0
+    mov\\t%x0, %x1"
+   [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+-                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")]
++                     f_loadd,f_stored,load1,store1,mov_reg")]
+ )
+ 
+ (define_expand "movtf"
+   [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ 	(match_operand:TF 1 "general_operand" ""))]
+   ""
+-  "
++  {
+     if (!TARGET_FLOAT)
+-     {
+-	sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\");
++      {
++	aarch64_err_no_fpadvsimd (TFmode, "code");
+ 	FAIL;
+-     }
++      }
+ 
+-    if (GET_CODE (operands[0]) == MEM)
++    if (GET_CODE (operands[0]) == MEM
++        && ! (GET_CODE (operands[1]) == CONST_DOUBLE
++	      && aarch64_float_const_zero_rtx_p (operands[1])))
+       operands[1] = force_reg (TFmode, operands[1]);
+-  "
++  }
+ )
+ 
+ (define_insn "*movtf_aarch64"
+   [(set (match_operand:TF 0
+-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump")
++	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
+ 	(match_operand:TF 1
+-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?rY"))]
++	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
+   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
+-    || register_operand (operands[1], TFmode))"
++    || aarch64_reg_or_fp_zero (operands[1], TFmode))"
+   "@
+    orr\\t%0.16b, %1.16b, %1.16b
+    #
+@@ -1062,12 +1066,13 @@
+    ldr\\t%q0, %1
+    str\\t%q1, %0
+    ldp\\t%0, %H0, %1
+-   stp\\t%1, %H1, %0"
+-  [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\
+-                     f_loadd,f_stored,neon_load1_2reg,neon_store1_2reg")
+-   (set_attr "length" "4,8,8,8,4,4,4,4,4,4")
+-   (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*")
+-   (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
++   stp\\t%1, %H1, %0
++   stp\\txzr, xzr, %0"
++  [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,fconstd,\
++                     f_loadd,f_stored,load2,store2,store2")
++   (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
++   (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*,*")
++   (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
+ )
+ 
+ (define_split
+@@ -1414,18 +1419,28 @@
+   "
+   if (! aarch64_plus_operand (operands[2], VOIDmode))
+     {
+-      rtx subtarget = ((optimize && can_create_pseudo_p ())
+-		       ? gen_reg_rtx (<MODE>mode) : operands[0]);
+       HOST_WIDE_INT imm = INTVAL (operands[2]);
+ 
+-      if (imm < 0)
+-	imm = -(-imm & ~0xfff);
++      if (aarch64_move_imm (imm, <MODE>mode) && can_create_pseudo_p ())
++        {
++	  rtx tmp = gen_reg_rtx (<MODE>mode);
++	  emit_move_insn (tmp, operands[2]);
++	  operands[2] = tmp;
++        }
+       else
+-        imm &= ~0xfff;
 -
--#define vst1_lane_u16(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       uint16x4_t b_ = (b);                                             \
--       uint16_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_u32(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       uint32x2_t b_ = (b);                                             \
--       uint32_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1_lane_u64(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       uint64x1_t b_ = (b);                                             \
--       uint64_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--
--#define vst1q_lane_f32(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       float32x4_t b_ = (b);                                            \
--       float32_t * a_ = (a);                                            \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_f64(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       float64x2_t b_ = (b);                                            \
--       float64_t * a_ = (a);                                            \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_p8(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       poly8x16_t b_ = (b);                                             \
--       poly8_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_p16(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       poly16x8_t b_ = (b);                                             \
--       poly16_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_s8(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       int8x16_t b_ = (b);                                              \
--       int8_t * a_ = (a);                                               \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_s16(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       int16x8_t b_ = (b);                                              \
--       int16_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_s32(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       int32x4_t b_ = (b);                                              \
--       int32_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_s64(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       int64x2_t b_ = (b);                                              \
--       int64_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_u8(a, b, c)                                          \
--  __extension__                                                         \
--    ({                                                                  \
--       uint8x16_t b_ = (b);                                             \
--       uint8_t * a_ = (a);                                              \
--       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_u16(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       uint16x8_t b_ = (b);                                             \
--       uint16_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_u32(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       uint32x4_t b_ = (b);                                             \
--       uint32_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--#define vst1q_lane_u64(a, b, c)                                         \
--  __extension__                                                         \
--    ({                                                                  \
--       uint64x2_t b_ = (b);                                             \
--       uint64_t * a_ = (a);                                             \
--       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
--                :                                                       \
--                : "r"(a_), "w"(b_), "i"(c)                              \
--                : "memory");                                            \
--     })
--
--
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_p8 (poly8x8_t a, poly8x8_t b)
- {
-@@ -11668,25 +11400,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
- 
- /* vaes  */
- 
--static __inline uint8x16_t
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaeseq_u8 (uint8x16_t data, uint8x16_t key)
- {
-   return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
- }
- 
--static __inline uint8x16_t
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaesdq_u8 (uint8x16_t data, uint8x16_t key)
- {
-   return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
- }
- 
--static __inline uint8x16_t
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaesmcq_u8 (uint8x16_t data)
- {
-   return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
- }
+-      emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
+-      operands[1] = subtarget;
+-      operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
++        {
++	  rtx subtarget = ((optimize && can_create_pseudo_p ())
++			   ? gen_reg_rtx (<MODE>mode) : operands[0]);
++
++	  if (imm < 0)
++	    imm = -(-imm & ~0xfff);
++	  else
++	    imm &= ~0xfff;
++
++	  emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
++	  operands[1] = subtarget;
++	  operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
++        }
+     }
+   "
+ )
+@@ -1529,6 +1544,38 @@
+   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+ )
  
--static __inline uint8x16_t
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaesimcq_u8 (uint8x16_t data)
- {
-   return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
-@@ -11887,7 +11619,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vceq_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
-+  return (uint64x1_t) (__a == __b);
- }
++(define_insn "*adds_shift_imm_<mode>"
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (plus:GPI (ASHIFT:GPI 
++		    (match_operand:GPI 1 "register_operand" "r")
++		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
++		   (match_operand:GPI 3 "register_operand" "r"))
++	 (const_int 0)))
++   (set (match_operand:GPI 0 "register_operand" "=r")
++	(plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))
++		  (match_dup 3)))]
++  ""
++  "adds\\t%<w>0, %<w>3, %<w>1, <shift> %2"
++  [(set_attr "type" "alus_shift_imm")]
++)
++
++(define_insn "*subs_shift_imm_<mode>"
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
++		    (ASHIFT:GPI
++		     (match_operand:GPI 2 "register_operand" "r")
++		     (match_operand:QI 3 "aarch64_shift_imm_<mode>" "n")))
++	 (const_int 0)))
++   (set (match_operand:GPI 0 "register_operand" "=r")
++	(minus:GPI (match_dup 1)
++		   (ASHIFT:GPI (match_dup 2) (match_dup 3))))]
++  ""
++  "subs\\t%<w>0, %<w>1, %<w>2, <shift> %3"
++  [(set_attr "type" "alus_shift_imm")]
++)
++
+ (define_insn "*adds_mul_imm_<mode>"
+   [(set (reg:CC_NZ CC_REGNUM)
+ 	(compare:CC_NZ
+@@ -1589,6 +1636,42 @@
+   [(set_attr "type" "alus_ext")]
+ )
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -11911,7 +11643,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vceq_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
-+  return (__a == __b);
- }
++(define_insn "*adds_<optab><ALLX:mode>_shift_<GPI:mode>"
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (plus:GPI (ashift:GPI 
++		    (ANY_EXTEND:GPI 
++		     (match_operand:ALLX 1 "register_operand" "r"))
++		    (match_operand 2 "aarch64_imm3" "Ui3"))
++		   (match_operand:GPI 3 "register_operand" "r"))
++	 (const_int 0)))
++   (set (match_operand:GPI 0 "register_operand" "=rk")
++	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1))
++			      (match_dup 2))
++		  (match_dup 3)))]
++  ""
++  "adds\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
++  [(set_attr "type" "alus_ext")]
++)
++
++(define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>"
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
++		    (ashift:GPI 
++		     (ANY_EXTEND:GPI
++		      (match_operand:ALLX 2 "register_operand" "r"))
++		     (match_operand 3 "aarch64_imm3" "Ui3")))
++	 (const_int 0)))
++   (set (match_operand:GPI 0 "register_operand" "=rk")
++	(minus:GPI (match_dup 1)
++		   (ashift:GPI (ANY_EXTEND:GPI (match_dup 2))
++			       (match_dup 3))))]
++  ""
++  "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
++  [(set_attr "type" "alus_ext")]
++)
++
+ (define_insn "*adds_<optab><mode>_multp2"
+   [(set (reg:CC_NZ CC_REGNUM)
+ 	(compare:CC_NZ
+@@ -1884,6 +1967,38 @@
+   [(set_attr "type" "adc_reg")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12047,7 +11779,7 @@ vceqz_s32 (int32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vceqz_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
-+  return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
- }
++(define_insn "*add_uxt<mode>_shift2"
++  [(set (match_operand:GPI 0 "register_operand" "=rk")
++	(plus:GPI (and:GPI
++		   (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
++			       (match_operand 2 "aarch64_imm3" "Ui3"))
++		   (match_operand 3 "const_int_operand" "n"))
++		  (match_operand:GPI 4 "register_operand" "r")))]
++  "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
++  "*
++  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]),
++					   INTVAL (operands[3])));
++  return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
++  [(set_attr "type" "alu_ext")]
++)
++
++;; zero_extend version of above
++(define_insn "*add_uxtsi_shift2_uxtw"
++  [(set (match_operand:DI 0 "register_operand" "=rk")
++	(zero_extend:DI
++	 (plus:SI (and:SI
++		   (ashift:SI (match_operand:SI 1 "register_operand" "r")
++			      (match_operand 2 "aarch64_imm3" "Ui3"))
++		   (match_operand 3 "const_int_operand" "n"))
++		  (match_operand:SI 4 "register_operand" "r"))))]
++  "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
++  "*
++  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
++					   INTVAL (operands[3])));
++  return \"add\t%w0, %w4, %w1, uxt%e3 %2\";"
++  [(set_attr "type" "alu_ext")]
++)
++
+ (define_insn "*add_uxt<mode>_multp2"
+   [(set (match_operand:GPI 0 "register_operand" "=rk")
+ 	(plus:GPI (and:GPI
+@@ -2140,6 +2255,38 @@
+   [(set_attr "type" "adc_reg")]
+ )
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -12071,7 +11803,7 @@ vceqz_u32 (uint32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vceqz_u64 (uint64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
-+  return (__a == __AARCH64_UINT64_C (0));
- }
++(define_insn "*sub_uxt<mode>_shift2"
++  [(set (match_operand:GPI 0 "register_operand" "=rk")
++	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
++		   (and:GPI
++		    (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
++				(match_operand 2 "aarch64_imm3" "Ui3"))
++		    (match_operand 3 "const_int_operand" "n"))))]
++  "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
++  "*
++  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
++					   INTVAL (operands[3])));
++  return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
++  [(set_attr "type" "alu_ext")]
++)
++
++;; zero_extend version of above
++(define_insn "*sub_uxtsi_shift2_uxtw"
++  [(set (match_operand:DI 0 "register_operand" "=rk")
++	(zero_extend:DI
++	 (minus:SI (match_operand:SI 4 "register_operand" "rk")
++		   (and:SI
++		    (ashift:SI (match_operand:SI 1 "register_operand" "r")
++			       (match_operand 2 "aarch64_imm3" "Ui3"))
++		    (match_operand 3 "const_int_operand" "n")))))]
++  "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
++  "*
++  operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
++					   INTVAL (operands[3])));
++  return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";"
++  [(set_attr "type" "alu_ext")]
++)
++
+ (define_insn "*sub_uxt<mode>_multp2"
+   [(set (match_operand:GPI 0 "register_operand" "=rk")
+ 	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
+@@ -2172,35 +2319,16 @@
+   [(set_attr "type" "alu_ext")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12201,7 +11933,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcge_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
-+  return (uint64x1_t) (__a >= __b);
- }
+-(define_insn_and_split "absdi2"
+-  [(set (match_operand:DI 0 "register_operand" "=&r,w")
+-	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
++(define_expand "abs<mode>2"
++  [(match_operand:GPI 0 "register_operand" "")
++   (match_operand:GPI 1 "register_operand" "")]
+   ""
+-  "@
+-   #
+-   abs\\t%d0, %d1"
+-  "reload_completed
+-   && GP_REGNUM_P (REGNO (operands[0]))
+-   && GP_REGNUM_P (REGNO (operands[1]))"
+-  [(const_int 0)]
+   {
+-    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+-			    gen_rtx_XOR (DImode,
+-					 gen_rtx_ASHIFTRT (DImode,
+-							   operands[1],
+-							   GEN_INT (63)),
+-					 operands[1])));
+-    emit_insn (gen_rtx_SET (VOIDmode,
+-			    operands[0],
+-			    gen_rtx_MINUS (DImode,
+-					   operands[0],
+-					   gen_rtx_ASHIFTRT (DImode,
+-							     operands[1],
+-							     GEN_INT (63)))));
++    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
++    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
++    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
+     DONE;
+   }
+-  [(set_attr "type" "alu_sreg")
+-   (set_attr "simd" "no,yes")]
+ )
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -12225,7 +11957,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcge_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
-+  return (__a >= __b);
- }
+ (define_insn "neg<mode>2"
+@@ -2852,7 +2980,7 @@
+         (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
+                   (match_operand:GPI 1 "register_operand" "r")))]
+   ""
+-  "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
++  "cinc\\t%<w>0, %<w>1, %m2"
+   [(set_attr "type" "csel")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12349,7 +12081,7 @@ vcgez_s32 (int32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcgez_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
-+  return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
- }
+@@ -2879,7 +3007,7 @@
+   [(set_attr "type" "csel")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12443,7 +12175,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcgt_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
-+  return (uint64x1_t) (__a > __b);
- }
+-(define_insn "*csneg3<mode>_insn"
++(define_insn "csneg3<mode>_insn"
+   [(set (match_operand:GPI 0 "register_operand" "=r")
+         (if_then_else:GPI
+ 	  (match_operand 1 "aarch64_comparison_operation" "")
+@@ -3058,6 +3186,26 @@
+    (set_attr "simd" "*,yes")]
+ )
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -12467,7 +12199,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
-+  return (__a > __b);
- }
++(define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extend:DI
++	  (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
++	               (match_operand:SI 2 "register_operand" "r"))))]
++  ""
++  "<NLOGICAL:nlogical>\\t%w0, %w2, %w1"
++  [(set_attr "type" "logic_reg")]
++)
++
++(define_insn "*xor_one_cmplsidi3_ze"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++        (zero_extend:DI
++          (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r")
++                          (match_operand:SI 2 "register_operand" "r")))))]
++  ""
++  "eon\\t%w0, %w1, %w2"
++  [(set_attr "type" "logic_reg")]
++)
++
+ ;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)).
+ ;; eon does not operate on SIMD registers so the vector variant must be split.
+ (define_insn_and_split "*xor_one_cmpl<mode>3"
+@@ -3119,7 +3267,7 @@
+   [(set_attr "type" "logics_reg")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12591,7 +12323,7 @@ vcgtz_s32 (int32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcgtz_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
-+  return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
- }
+-(define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
++(define_insn "<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
+   [(set (match_operand:GPI 0 "register_operand" "=r")
+ 	(LOGICAL:GPI (not:GPI
+ 		      (SHIFT:GPI
+@@ -3128,7 +3276,33 @@
+ 		     (match_operand:GPI 3 "register_operand" "r")))]
+   ""
+   "<LOGICAL:nlogical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+-  [(set_attr "type" "logics_shift_imm")]
++  [(set_attr "type" "logic_shift_imm")]
++)
++
++(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"
++  [(set (match_operand:GPI 0 "register_operand" "=r")
++	(not:GPI (xor:GPI
++		      (SHIFT:GPI
++		       (match_operand:GPI 1 "register_operand" "r")
++		       (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
++		     (match_operand:GPI 3 "register_operand" "r"))))]
++  ""
++  "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
++  [(set_attr "type" "logic_shift_imm")]
++)
++
++;; Zero-extend version of the above.
++(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extend:DI
++	  (not:SI (xor:SI
++		    (SHIFT:SI
++		      (match_operand:SI 1 "register_operand" "r")
++		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
++		    (match_operand:SI 3 "register_operand" "r")))))]
++  ""
++  "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"
++  [(set_attr "type" "logic_shift_imm")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12685,7 +12417,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcle_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
-+  return (uint64x1_t) (__a <= __b);
- }
+ (define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
+@@ -3347,32 +3521,33 @@
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -12709,7 +12441,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcle_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
-+  return (__a <= __b);
- }
+ ;; Logical left shift using SISD or Integer instruction
+ (define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
+-  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
++  [(set (match_operand:GPI 0 "register_operand" "=r,w,w")
+         (ashift:GPI
+-          (match_operand:GPI 1 "register_operand" "w,w,r")
+-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
++          (match_operand:GPI 1 "register_operand" "r,w,w")
++          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w")))]
+   ""
+   "@
++   lsl\t%<w>0, %<w>1, %<w>2
+    shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+-   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+-   lsl\t%<w>0, %<w>1, %<w>2"
+-  [(set_attr "simd" "yes,yes,no")
+-   (set_attr "type" "neon_shift_imm<q>, neon_shift_reg<q>,shift_reg")]
++   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>"
++  [(set_attr "simd" "no,yes,yes")
++   (set_attr "type" "shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12833,7 +12565,7 @@ vclez_s32 (int32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vclez_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
-+  return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -12927,7 +12659,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vclt_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
-+  return (uint64x1_t) (__a < __b);
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -12951,7 +12683,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vclt_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
-+  return (__a < __b);
- }
+ ;; Logical right shift using SISD or Integer instruction
+ (define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
+-  [(set (match_operand:GPI 0 "register_operand" "=w,&w,r")
++  [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
+         (lshiftrt:GPI
+-          (match_operand:GPI 1 "register_operand" "w,w,r")
+-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
++          (match_operand:GPI 1 "register_operand" "r,w,w,w")
++          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w,0")))]
+   ""
+   "@
++   lsr\t%<w>0, %<w>1, %<w>2
+    ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+    #
+-   lsr\t%<w>0, %<w>1, %<w>2"
+-  [(set_attr "simd" "yes,yes,no")
+-   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
++   #"
++  [(set_attr "simd" "no,yes,yes,yes")
++   (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
+ )
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -13075,7 +12807,7 @@ vcltz_s32 (int32x2_t __a)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vcltz_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
-+  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
- }
+ (define_split
+@@ -3407,18 +3582,18 @@
  
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-@@ -21321,72 +21053,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
+ ;; Arithmetic right shift using SISD or Integer instruction
+ (define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
+-  [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
++  [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
+         (ashiftrt:GPI
+-          (match_operand:GPI 1 "register_operand" "w,w,w,r")
+-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
++          (match_operand:GPI 1 "register_operand" "r,w,w,w")
++          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs<cmode>,Us<cmode>,w,0")))]
+   ""
+   "@
++   asr\t%<w>0, %<w>1, %<w>2
+    sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+    #
+-   #
+-   asr\t%<w>0, %<w>1, %<w>2"
+-  [(set_attr "simd" "yes,yes,yes,no")
+-   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
++   #"
++  [(set_attr "simd" "no,yes,yes,yes")
++   (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
+ )
  
- /* vsha1  */
+ (define_split
+@@ -3551,6 +3726,21 @@
+   [(set_attr "type" "shift_imm")]
+ )
  
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
- {
-   return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
- }
--static __inline uint32x4_t
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
- {
-   return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
- }
--static __inline uint32x4_t
++;; There are no canonicalisation rules for ashift and lshiftrt inside an ior
++;; so we have to match both orderings.
++(define_insn "*extr<mode>5_insn_alt"
++  [(set (match_operand:GPI 0 "register_operand" "=r")
++	(ior:GPI  (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
++			        (match_operand 4 "const_int_operand" "n"))
++		  (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
++			      (match_operand 3 "const_int_operand" "n"))))]
++  "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode)
++   && (UINTVAL (operands[3]) + UINTVAL (operands[4])
++       == GET_MODE_BITSIZE (<MODE>mode))"
++  "extr\\t%<w>0, %<w>1, %<w>2, %4"
++  [(set_attr "type" "shift_imm")]
++)
 +
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
- {
-   return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
- }
- 
--static __inline uint32_t
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vsha1h_u32 (uint32_t hash_e)
- {
-   return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
- {
-   return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
- {
-   return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
- {
-   return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
- {
-   return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
- {
-   return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
- }
- 
--static __inline uint32x4_t
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
- {
-   return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
- }
- 
--static __inline poly128_t
-+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
- vmull_p64 (poly64_t a, poly64_t b)
- {
-   return
-     __builtin_aarch64_crypto_pmulldi_ppp (a, b);
- }
- 
--static __inline poly128_t
-+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
- vmull_high_p64 (poly64x2_t a, poly64x2_t b)
- {
-   return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
-@@ -22302,6 +22036,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b)
-   *a = b[0];
- }
+ ;; zero_extend version of the above
+ (define_insn "*extrsi5_insn_uxtw"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+@@ -3565,6 +3755,19 @@
+   [(set_attr "type" "shift_imm")]
+ )
  
-+/* vst1q */
++(define_insn "*extrsi5_insn_uxtw_alt"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extend:DI
++	 (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
++			       (match_operand 4 "const_int_operand" "n"))
++		 (ashift:SI (match_operand:SI 1 "register_operand" "r")
++			    (match_operand 3 "const_int_operand" "n")))))]
++  "UINTVAL (operands[3]) < 32 &&
++   (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
++  "extr\\t%w0, %w1, %w2, %4"
++  [(set_attr "type" "shift_imm")]
++)
 +
- __extension__ static __inline void __attribute__ ((__always_inline__))
- vst1q_f32 (float32_t *a, float32x4_t b)
- {
-@@ -22314,8 +22050,6 @@ vst1q_f64 (float64_t *a, float64x2_t b)
-   __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
- }
+ (define_insn "*ror<mode>3_insn"
+   [(set (match_operand:GPI 0 "register_operand" "=r")
+ 	(rotate:GPI (match_operand:GPI 1 "register_operand" "r")
+@@ -3915,7 +4118,7 @@
+ (define_insn "<optab><fcvt_target><GPF:mode>2"
+   [(set (match_operand:GPF 0 "register_operand" "=w,w")
+         (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))]
+-  ""
++  "TARGET_FLOAT"
+   "@
+    <su_optab>cvtf\t%<GPF:s>0, %<s>1
+    <su_optab>cvtf\t%<GPF:s>0, %<w1>1"
+--- a/src/gcc/config/aarch64/aarch64.opt
++++ b/src/gcc/config/aarch64/aarch64.opt
+@@ -111,6 +111,10 @@ mabi=
+ Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT)
+ -mabi=ABI	Generate code that conforms to the specified ABI
+ 
++moverride=
++Target RejectNegative ToLower Joined Var(aarch64_override_tune_string)
++-moverride=STRING	Power users only! Override CPU optimization parameters
++
+ Enum
+ Name(aarch64_abi) Type(int)
+ Known AArch64 ABIs (for use with the -mabi= option):
+--- a/src/gcc/config/aarch64/arm_neon.h
++++ b/src/gcc/config/aarch64/arm_neon.h
+@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a)
  
--/* vst1q */
+ /* vcvt_high_f32_f16 not supported */
+ 
+-static float32x2_t vdup_n_f32 (float32_t);
 -
- __extension__ static __inline void __attribute__ ((__always_inline__))
- vst1q_p8 (poly8_t *a, poly8x16_t b)
- {
-@@ -22382,6 +22116,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)
- 			     (int64x2_t) b);
- }
+ #define vcvt_n_f32_s32(a, b)                                            \
+   __extension__                                                         \
+     ({                                                                  \
+@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b)
+        result;                                                          \
+      })
  
-+/* vst1_lane */
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
-+
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
-+{
+-#define vst1_lane_f32(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       float32x2_t b_ = (b);                                            \
+-       float32_t * a_ = (a);                                            \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_f64(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       float64x1_t b_ = (b);                                            \
+-       float64_t * a_ = (a);                                            \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_p8(a, b, c)                                           \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       poly8x8_t b_ = (b);                                              \
+-       poly8_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_p16(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       poly16x4_t b_ = (b);                                             \
+-       poly16_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_s8(a, b, c)                                           \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int8x8_t b_ = (b);                                               \
+-       int8_t * a_ = (a);                                               \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_s16(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int16x4_t b_ = (b);                                              \
+-       int16_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_s32(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int32x2_t b_ = (b);                                              \
+-       int32_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_s64(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int64x1_t b_ = (b);                                              \
+-       int64_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_u8(a, b, c)                                           \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint8x8_t b_ = (b);                                              \
+-       uint8_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_u16(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint16x4_t b_ = (b);                                             \
+-       uint16_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_u32(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint32x2_t b_ = (b);                                             \
+-       uint32_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1_lane_u64(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint64x1_t b_ = (b);                                             \
+-       uint64_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-
+-#define vst1q_lane_f32(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       float32x4_t b_ = (b);                                            \
+-       float32_t * a_ = (a);                                            \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_f64(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       float64x2_t b_ = (b);                                            \
+-       float64_t * a_ = (a);                                            \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_p8(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       poly8x16_t b_ = (b);                                             \
+-       poly8_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_p16(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       poly16x8_t b_ = (b);                                             \
+-       poly16_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_s8(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int8x16_t b_ = (b);                                              \
+-       int8_t * a_ = (a);                                               \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_s16(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int16x8_t b_ = (b);                                              \
+-       int16_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_s32(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int32x4_t b_ = (b);                                              \
+-       int32_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_s64(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       int64x2_t b_ = (b);                                              \
+-       int64_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_u8(a, b, c)                                          \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint8x16_t b_ = (b);                                             \
+-       uint8_t * a_ = (a);                                              \
+-       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_u16(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint16x8_t b_ = (b);                                             \
+-       uint16_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_u32(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint32x4_t b_ = (b);                                             \
+-       uint32_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-#define vst1q_lane_u64(a, b, c)                                         \
+-  __extension__                                                         \
+-    ({                                                                  \
+-       uint64x2_t b_ = (b);                                             \
+-       uint64_t * a_ = (a);                                             \
+-       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+-                :                                                       \
+-                : "r"(a_), "w"(b_), "i"(c)                              \
+-                : "memory");                                            \
+-     })
+-
+-
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+ vtst_p8 (poly8x8_t a, poly8x8_t b)
+ {
+@@ -10218,8 +9950,8 @@ __STRUCTN (float, 64, 4)
+ #undef __STRUCTN
+ 
+ 
+-#define __ST2_LANE_FUNC(intype, largetype, ptrtype,			     \
+-			mode, ptr_mode, funcsuffix, signedtype)		     \
++#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
++			qmode, ptr_mode, funcsuffix, signedtype)	     \
+ __extension__ static __inline void					     \
+ __attribute__ ((__always_inline__))					     \
+ vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+@@ -10233,31 +9965,37 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+   __temp.val[1]								     \
+     = vcombine_##funcsuffix (__b.val[1],				     \
+ 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+-  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
+-					    (signedtype) __temp.val[0], 0);  \
+-  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
+-					    (signedtype) __temp.val[1], 1);  \
++  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
++					     (signedtype) __temp.val[0], 0); \
++  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
++					     (signedtype) __temp.val[1], 1); \
+   __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+ 				     __ptr, __o, __c);			     \
+ }
+ 
+-__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
++__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
+ 		 float32x4_t)
+-__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
++__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
+ 		 float64x2_t)
+-__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
+-__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
++__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
++		 int8x16_t)
++__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
+ 		 int16x8_t)
+-__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
+-__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
+-__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
+-__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
+-__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
+-__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
++__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
++		 int8x16_t)
++__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
++		 int16x8_t)
++__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
++		 int32x4_t)
++__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
++		 int64x2_t)
++__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
++		 int8x16_t)
++__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
+ 		 int16x8_t)
+-__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
++__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
+ 		 int32x4_t)
+-__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
++__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
+ 		 int64x2_t)
+ 
+ #undef __ST2_LANE_FUNC
+@@ -10286,8 +10024,8 @@ __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
+ __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
+ __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
+ 
+-#define __ST3_LANE_FUNC(intype, largetype, ptrtype,			     \
+-			mode, ptr_mode, funcsuffix, signedtype)		     \
++#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
++			qmode, ptr_mode, funcsuffix, signedtype)	     \
+ __extension__ static __inline void					     \
+ __attribute__ ((__always_inline__))					     \
+ vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+@@ -10304,33 +10042,39 @@ vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+   __temp.val[2]								     \
+     = vcombine_##funcsuffix (__b.val[2],				     \
+ 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+-					    (signedtype) __temp.val[0], 0);  \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+-					    (signedtype) __temp.val[1], 1);  \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+-					    (signedtype) __temp.val[2], 2);  \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
++					     (signedtype) __temp.val[0], 0); \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
++					     (signedtype) __temp.val[1], 1); \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
++					     (signedtype) __temp.val[2], 2); \
+   __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+ 				     __ptr, __o, __c);			     \
+ }
+ 
+-__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
++__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
+ 		 float32x4_t)
+-__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
++__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
+ 		 float64x2_t)
+-__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
+-__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
++__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
++		 int8x16_t)
++__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
++		 int16x8_t)
++__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
++		 int8x16_t)
++__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
+ 		 int16x8_t)
+-__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
+-__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
+-__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
+-__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
+-__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
+-__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
++__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
++		 int32x4_t)
++__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
++		 int64x2_t)
++__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
++		 int8x16_t)
++__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
+ 		 int16x8_t)
+-__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
++__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
+ 		 int32x4_t)
+-__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
++__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
+ 		 int64x2_t)
+ 
+ #undef __ST3_LANE_FUNC
+@@ -10359,8 +10103,8 @@ __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
+ __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
+ __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
+ 
+-#define __ST4_LANE_FUNC(intype, largetype, ptrtype,			     \
+-			mode, ptr_mode, funcsuffix, signedtype)		     \
++#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
++			qmode, ptr_mode, funcsuffix, signedtype)	     \
+ __extension__ static __inline void					     \
+ __attribute__ ((__always_inline__))					     \
+ vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+@@ -10380,35 +10124,41 @@ vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+   __temp.val[3]								     \
+     = vcombine_##funcsuffix (__b.val[3],				     \
+ 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+-					    (signedtype) __temp.val[0], 0);  \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+-					    (signedtype) __temp.val[1], 1);  \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+-					    (signedtype) __temp.val[2], 2);  \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+-					    (signedtype) __temp.val[3], 3);  \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
++					     (signedtype) __temp.val[0], 0); \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
++					     (signedtype) __temp.val[1], 1); \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
++					     (signedtype) __temp.val[2], 2); \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
++					     (signedtype) __temp.val[3], 3); \
+   __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+ 				     __ptr, __o, __c);			     \
+ }
+ 
+-__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
++__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
+ 		 float32x4_t)
+-__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
++__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
+ 		 float64x2_t)
+-__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
+-__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
++__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
++		 int8x16_t)
++__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
+ 		 int16x8_t)
+-__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
+-__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
+-__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
+-__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
+-__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
+-__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
++__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
++		 int8x16_t)
++__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
+ 		 int16x8_t)
+-__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
++__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
+ 		 int32x4_t)
+-__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
++__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
++		 int64x2_t)
++__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
++		 int8x16_t)
++__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
++		 int16x8_t)
++__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
++		 int32x4_t)
++__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
+ 		 int64x2_t)
+ 
+ #undef __ST4_LANE_FUNC
+@@ -11668,25 +11418,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+ 
+ /* vaes  */
+ 
+-static __inline uint8x16_t
++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+ vaeseq_u8 (uint8x16_t data, uint8x16_t key)
+ {
+   return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
+ }
+ 
+-static __inline uint8x16_t
++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+ vaesdq_u8 (uint8x16_t data, uint8x16_t key)
+ {
+   return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
+ }
+ 
+-static __inline uint8x16_t
++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+ vaesmcq_u8 (uint8x16_t data)
+ {
+   return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
+ }
+ 
+-static __inline uint8x16_t
++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+ vaesimcq_u8 (uint8x16_t data)
+ {
+   return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
+@@ -11887,7 +11637,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vceq_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
++  return (uint64x1_t) (__a == __b);
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -11911,7 +11661,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vceq_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
++  return (__a == __b);
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12047,7 +11797,7 @@ vceqz_s32 (int32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vceqz_s64 (int64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
++  return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -12071,7 +11821,7 @@ vceqz_u32 (uint32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vceqz_u64 (uint64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
++  return (__a == __AARCH64_UINT64_C (0));
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12201,7 +11951,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcge_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
++  return (uint64x1_t) (__a >= __b);
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -12225,7 +11975,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcge_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
++  return (__a >= __b);
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12349,7 +12099,7 @@ vcgez_s32 (int32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcgez_s64 (int64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
++  return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12443,7 +12193,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcgt_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
++  return (uint64x1_t) (__a > __b);
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -12467,7 +12217,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
++  return (__a > __b);
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12591,7 +12341,7 @@ vcgtz_s32 (int32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcgtz_s64 (int64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
++  return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12685,7 +12435,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcle_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
++  return (uint64x1_t) (__a <= __b);
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -12709,7 +12459,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcle_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
++  return (__a <= __b);
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12833,7 +12583,7 @@ vclez_s32 (int32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vclez_s64 (int64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
++  return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -12927,7 +12677,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vclt_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
++  return (uint64x1_t) (__a < __b);
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -12951,7 +12701,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vclt_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
++  return (__a < __b);
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -13075,7 +12825,7 @@ vcltz_s32 (int32x2_t __a)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vcltz_s64 (int64x1_t __a)
+ {
+-  return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
++  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+@@ -17067,8 +16817,8 @@ vld4q_dup_f64 (const float64_t * __a)
+ 
+ /* vld2_lane */
+ 
+-#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+-			 mode, ptrmode, funcsuffix, signedtype)		   \
++#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
++			 qmode, ptrmode, funcsuffix, signedtype)	   \
+ __extension__ static __inline intype __attribute__ ((__always_inline__))   \
+ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+ {									   \
+@@ -17078,12 +16828,12 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+     vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+   __temp.val[1] =							   \
+     vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+-  __o = __builtin_aarch64_set_qregoi##mode (__o,			   \
+-					   (signedtype) __temp.val[0],	   \
+-					   0);				   \
+-  __o = __builtin_aarch64_set_qregoi##mode (__o,			   \
+-					   (signedtype) __temp.val[1],	   \
+-					   1);				   \
++  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
++					    (signedtype) __temp.val[0],	   \
++					    0);				   \
++  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
++					    (signedtype) __temp.val[1],	   \
++					    1);				   \
+   __o =	__builtin_aarch64_ld2_lane##mode (				   \
+ 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+   __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);	   \
+@@ -17091,29 +16841,29 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+   return __b;								   \
+ }
+ 
+-__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
++__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
+ 		 sf, f32, float32x4_t)
+-__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df,
++__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
+ 		 df, f64, float64x2_t)
+-__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8,
++__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
+ 		 int8x16_t)
+-__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi,
++__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
+ 		 p16, int16x8_t)
+-__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8,
++__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
+ 		 int8x16_t)
+-__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16,
++__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
+ 		 int16x8_t)
+-__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32,
++__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
+ 		 int32x4_t)
+-__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64,
++__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
+ 		 int64x2_t)
+-__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8,
++__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
+ 		 int8x16_t)
+-__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi,
++__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
+ 		 u16, int16x8_t)
+-__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si,
++__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
+ 		 u32, int32x4_t)
+-__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di,
++__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
+ 		 u64, int64x2_t)
+ 
+ #undef __LD2_LANE_FUNC
+@@ -17152,8 +16902,8 @@ __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
+ 
+ /* vld3_lane */
+ 
+-#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+-			 mode, ptrmode, funcsuffix, signedtype)		   \
++#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
++			 qmode, ptrmode, funcsuffix, signedtype)	   \
+ __extension__ static __inline intype __attribute__ ((__always_inline__))   \
+ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+ {									   \
+@@ -17165,15 +16915,15 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+     vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+   __temp.val[2] =							   \
+     vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+-					   (signedtype) __temp.val[0],	   \
+-					   0);				   \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+-					   (signedtype) __temp.val[1],	   \
+-					   1);				   \
+-  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+-					   (signedtype) __temp.val[2],	   \
+-					   2);				   \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
++					    (signedtype) __temp.val[0],	   \
++					    0);				   \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
++					    (signedtype) __temp.val[1],	   \
++					    1);				   \
++  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
++					    (signedtype) __temp.val[2],	   \
++					    2);				   \
+   __o =	__builtin_aarch64_ld3_lane##mode (				   \
+ 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+   __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);	   \
+@@ -17182,29 +16932,29 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+   return __b;								   \
+ }
+ 
+-__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
++__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
+ 		 sf, f32, float32x4_t)
+-__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df,
++__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
+ 		 df, f64, float64x2_t)
+-__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8,
++__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
+ 		 int8x16_t)
+-__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi,
++__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
+ 		 p16, int16x8_t)
+-__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8,
++__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
+ 		 int8x16_t)
+-__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16,
++__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
+ 		 int16x8_t)
+-__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32,
++__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
+ 		 int32x4_t)
+-__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64,
++__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
+ 		 int64x2_t)
+-__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8,
++__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
+ 		 int8x16_t)
+-__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi,
++__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
+ 		 u16, int16x8_t)
+-__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si,
++__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
+ 		 u32, int32x4_t)
+-__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di,
++__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
+ 		 u64, int64x2_t)
+ 
+ #undef __LD3_LANE_FUNC
+@@ -17245,8 +16995,8 @@ __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
+ 
+ /* vld4_lane */
+ 
+-#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+-			 mode, ptrmode, funcsuffix, signedtype)		   \
++#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
++			 qmode, ptrmode, funcsuffix, signedtype)	   \
+ __extension__ static __inline intype __attribute__ ((__always_inline__))   \
+ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+ {									   \
+@@ -17260,18 +17010,18 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+     vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+   __temp.val[3] =							   \
+     vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0));	   \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+-					   (signedtype) __temp.val[0],	   \
+-					   0);				   \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+-					   (signedtype) __temp.val[1],	   \
+-					   1);				   \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+-					   (signedtype) __temp.val[2],	   \
+-					   2);				   \
+-  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+-					   (signedtype) __temp.val[3],	   \
+-					   3);				   \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
++					    (signedtype) __temp.val[0],	   \
++					    0);				   \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
++					    (signedtype) __temp.val[1],	   \
++					    1);				   \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
++					    (signedtype) __temp.val[2],	   \
++					    2);				   \
++  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
++					    (signedtype) __temp.val[3],	   \
++					    3);				   \
+   __o =	__builtin_aarch64_ld4_lane##mode (				   \
+ 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+   __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0);	   \
+@@ -17283,29 +17033,29 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+ 
+ /* vld4q_lane */
+ 
+-__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
++__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
+ 		 sf, f32, float32x4_t)
+-__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df,
++__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
+ 		 df, f64, float64x2_t)
+-__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8,
++__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
+ 		 int8x16_t)
+-__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi,
++__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
+ 		 p16, int16x8_t)
+-__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8,
++__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
+ 		 int8x16_t)
+-__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16,
++__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
+ 		 int16x8_t)
+-__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32,
++__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
+ 		 int32x4_t)
+-__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64,
++__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
+ 		 int64x2_t)
+-__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8,
++__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
+ 		 int8x16_t)
+-__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi,
++__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
+ 		 u16, int16x8_t)
+-__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si,
++__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
+ 		 u32, int32x4_t)
+-__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di,
++__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
+ 		 u64, int64x2_t)
+ 
+ #undef __LD4_LANE_FUNC
+@@ -21321,72 +21071,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
+ 
+ /* vsha1  */
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+ {
+   return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
+ }
+-static __inline uint32x4_t
++
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+ {
+   return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
+ }
+-static __inline uint32x4_t
++
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+ {
+   return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
+ }
+ 
+-static __inline uint32_t
++__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+ vsha1h_u32 (uint32_t hash_e)
+ {
+   return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
+ {
+   return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
+ {
+   return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
+ {
+   return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
+ {
+   return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
+ {
+   return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
+ }
+ 
+-static __inline uint32x4_t
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
+ {
+   return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
+ }
+ 
+-static __inline poly128_t
++__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+ vmull_p64 (poly64_t a, poly64_t b)
+ {
+   return
+     __builtin_aarch64_crypto_pmulldi_ppp (a, b);
+ }
+ 
+-static __inline poly128_t
++__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+ vmull_high_p64 (poly64x2_t a, poly64x2_t b)
+ {
+   return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
+@@ -22302,6 +22054,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b)
+   *a = b[0];
+ }
+ 
++/* vst1q */
++
+ __extension__ static __inline void __attribute__ ((__always_inline__))
+ vst1q_f32 (float32_t *a, float32x4_t b)
+ {
+@@ -22314,8 +22068,6 @@ vst1q_f64 (float64_t *a, float64x2_t b)
+   __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+ }
+ 
+-/* vst1q */
+-
+ __extension__ static __inline void __attribute__ ((__always_inline__))
+ vst1q_p8 (poly8_t *a, poly8x16_t b)
+ {
+@@ -22382,6 +22134,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)
+ 			     (int64x2_t) b);
+ }
+ 
++/* vst1_lane */
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++/* vst1q_lane */
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
++{
 +  *__a = __aarch64_vget_lane_any (__b, __lane);
 +}
 +
-+/* vst1q_lane */
++__extension__ static __inline void __attribute__ ((__always_inline__))
++vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
++{
++  *__a = __aarch64_vget_lane_any (__b, __lane);
++}
++
+ /* vstn */
+ 
+ __extension__ static __inline void
+@@ -23887,7 +23787,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vtst_s64 (int64x1_t __a, int64x1_t __b)
+ {
+-  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
++  return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
+ }
+ 
+ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+@@ -23911,7 +23811,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b)
+ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+ vtst_u64 (uint64x1_t __a, uint64x1_t __b)
+ {
+-  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
++  return ((__a & __b) != __AARCH64_UINT64_C (0));
+ }
+ 
+ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+--- a/src/gcc/config/aarch64/atomics.md
++++ b/src/gcc/config/aarch64/atomics.md
+@@ -26,9 +26,28 @@
+     UNSPECV_STL				; Represent an atomic store or store-release.
+     UNSPECV_ATOMIC_CMPSW		; Represent an atomic compare swap.
+     UNSPECV_ATOMIC_EXCHG		; Represent an atomic exchange.
++    UNSPECV_ATOMIC_CAS			; Represent an atomic CAS.
++    UNSPECV_ATOMIC_SWP			; Represent an atomic SWP.
+     UNSPECV_ATOMIC_OP			; Represent an atomic operation.
++    UNSPECV_ATOMIC_LDOP			; Represent an atomic load-operation
++    UNSPECV_ATOMIC_LDOP_OR		; Represent an atomic load-or
++    UNSPECV_ATOMIC_LDOP_BIC		; Represent an atomic load-bic
++    UNSPECV_ATOMIC_LDOP_XOR		; Represent an atomic load-xor
++    UNSPECV_ATOMIC_LDOP_PLUS		; Represent an atomic load-add
+ ])
+ 
++;; Iterators for load-operate instructions.
++
++(define_int_iterator ATOMIC_LDOP
++ [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
++  UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
++
++(define_int_attr atomic_ldop
++ [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
++  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
++
++;; Instruction patterns.
++
+ (define_expand "atomic_compare_and_swap<mode>"
+   [(match_operand:SI 0 "register_operand" "")			;; bool out
+    (match_operand:ALLI 1 "register_operand" "")			;; val out
+@@ -45,10 +64,10 @@
+   }
+ )
+ 
+-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
++(define_insn_and_split "aarch64_compare_and_swap<mode>"
+   [(set (reg:CC CC_REGNUM)					;; bool out
+     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+-   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
++   (set (match_operand:SI 0 "register_operand" "=&r")	   ;; val out
+     (zero_extend:SI
+       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+    (set (match_dup 1)
+@@ -57,7 +76,7 @@
+        (match_operand:SHORT 3 "register_operand" "r")	;; desired
+        (match_operand:SI 4 "const_int_operand")		;; is_weak
+        (match_operand:SI 5 "const_int_operand")		;; mod_s
+-       (match_operand:SI 6 "const_int_operand")]		;; mod_f
++       (match_operand:SI 6 "const_int_operand")]	;; mod_f
+       UNSPECV_ATOMIC_CMPSW))
+    (clobber (match_scratch:SI 7 "=&r"))]
+   ""
+@@ -70,17 +89,17 @@
+   }
+ )
+ 
+-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
++(define_insn_and_split "aarch64_compare_and_swap<mode>"
+   [(set (reg:CC CC_REGNUM)					;; bool out
+     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+    (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
+-    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
++    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
+    (set (match_dup 1)
+     (unspec_volatile:GPI
+       [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+        (match_operand:GPI 3 "register_operand" "r")		;; desired
+-       (match_operand:SI 4 "const_int_operand")		;; is_weak
+-       (match_operand:SI 5 "const_int_operand")		;; mod_s
++       (match_operand:SI 4 "const_int_operand")			;; is_weak
++       (match_operand:SI 5 "const_int_operand")			;; mod_s
+        (match_operand:SI 6 "const_int_operand")]		;; mod_f
+       UNSPECV_ATOMIC_CMPSW))
+    (clobber (match_scratch:SI 7 "=&r"))]
+@@ -94,7 +113,79 @@
+   }
+ )
+ 
+-(define_insn_and_split "atomic_exchange<mode>"
++(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
++  [(set (reg:CC CC_REGNUM)					;; bool out
++    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
++   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
++    (zero_extend:SI
++      (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
++   (set (match_dup 1)
++    (unspec_volatile:SHORT
++      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
++       (match_operand:SHORT 3 "register_operand" "r")	;; desired
++       (match_operand:SI 4 "const_int_operand")		;; is_weak
++       (match_operand:SI 5 "const_int_operand")		;; mod_s
++       (match_operand:SI 6 "const_int_operand")]	;; mod_f
++      UNSPECV_ATOMIC_CMPSW))]
++  "TARGET_LSE"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_gen_atomic_cas (operands[0], operands[1],
++			    operands[2], operands[3],
++			    operands[5]);
++    DONE;
++  }
++)
++
++(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
++  [(set (reg:CC CC_REGNUM)					;; bool out
++    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
++   (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
++    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
++   (set (match_dup 1)
++    (unspec_volatile:GPI
++      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
++       (match_operand:GPI 3 "register_operand" "r")		;; desired
++       (match_operand:SI 4 "const_int_operand")			;; is_weak
++       (match_operand:SI 5 "const_int_operand")			;; mod_s
++       (match_operand:SI 6 "const_int_operand")]		;; mod_f
++      UNSPECV_ATOMIC_CMPSW))]
++  "TARGET_LSE"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_gen_atomic_cas (operands[0], operands[1],
++			    operands[2], operands[3],
++			    operands[5]);
++    DONE;
++  }
++)
++
++(define_expand "atomic_exchange<mode>"
++ [(match_operand:ALLI 0 "register_operand" "")
++  (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
++  (match_operand:ALLI 2 "register_operand" "")
++  (match_operand:SI 3 "const_int_operand" "")]
++  ""
++  {
++    rtx (*gen) (rtx, rtx, rtx, rtx);
++
++    /* Use an atomic SWP when available.  */
++    if (TARGET_LSE)
++      gen = gen_aarch64_atomic_exchange<mode>_lse;
++    else
++      gen = gen_aarch64_atomic_exchange<mode>;
++
++    emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
++
++    DONE;
++  }
++)
++
++(define_insn_and_split "aarch64_atomic_exchange<mode>"
+   [(set (match_operand:ALLI 0 "register_operand" "=&r")		;; output
+     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+    (set (match_dup 1)
+@@ -110,28 +201,87 @@
+   [(const_int 0)]
+   {
+     aarch64_split_atomic_op (SET, operands[0], NULL, operands[1],
+-			    operands[2], operands[3], operands[4]);
++			     operands[2], operands[3], operands[4]);
++    DONE;
++  }
++)
++
++(define_insn_and_split "aarch64_atomic_exchange<mode>_lse"
++  [(set (match_operand:ALLI 0 "register_operand" "=&r")
++    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
++   (set (match_dup 1)
++    (unspec_volatile:ALLI
++      [(match_operand:ALLI 2 "register_operand" "r")
++       (match_operand:SI 3 "const_int_operand" "")]
++      UNSPECV_ATOMIC_EXCHG))]
++  "TARGET_LSE"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
++			     operands[2], operands[3]);
++    DONE;
++  }
++)
++
++(define_expand "atomic_<atomic_optab><mode>"
++ [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
++  (atomic_op:ALLI
++   (match_operand:ALLI 1 "<atomic_op_operand>" "")
++   (match_operand:SI 2 "const_int_operand"))]
++  ""
++  {
++    rtx (*gen) (rtx, rtx, rtx);
++
++    /* Use an atomic load-operate instruction when possible.  */
++    if (aarch64_atomic_ldop_supported_p (<CODE>))
++      gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
++    else
++      gen = gen_aarch64_atomic_<atomic_optab><mode>;
++
++    emit_insn (gen (operands[0], operands[1], operands[2]));
++
+     DONE;
+   }
+ )
+ 
+-(define_insn_and_split "atomic_<atomic_optab><mode>"
++(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>"
++ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
++   (unspec_volatile:ALLI
++    [(atomic_op:ALLI (match_dup 0)
++      (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
++     (match_operand:SI 2 "const_int_operand")]
++    UNSPECV_ATOMIC_OP))
++  (clobber (reg:CC CC_REGNUM))
++  (clobber (match_scratch:ALLI 3 "=&r"))
++  (clobber (match_scratch:SI 4 "=&r"))]
++  ""
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
++			     operands[1], operands[2], operands[4]);
++    DONE;
++  }
++)
++
++(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+   [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+     (unspec_volatile:ALLI
+       [(atomic_op:ALLI (match_dup 0)
+ 	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
+-       (match_operand:SI 2 "const_int_operand")]		;; model
++       (match_operand:SI 2 "const_int_operand")]
+       UNSPECV_ATOMIC_OP))
+-       (clobber (reg:CC CC_REGNUM))
+-   (clobber (match_scratch:ALLI 3 "=&r"))
+-   (clobber (match_scratch:SI 4 "=&r"))]
+-  ""
++   (clobber (match_scratch:ALLI 3 "=&r"))]
++  "TARGET_LSE"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+-    aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+-			    operands[1], operands[2], operands[4]);
++    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
++			     operands[1], operands[2]);
+     DONE;
+   }
+ )
+@@ -158,7 +308,30 @@
+   }
+ )
+ 
+-(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
++;; Load-operate-store, returning the updated memory data.
++
++(define_expand "atomic_fetch_<atomic_optab><mode>"
++ [(match_operand:ALLI 0 "register_operand" "")
++  (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
++  (atomic_op:ALLI
++   (match_operand:ALLI 2 "<atomic_op_operand>" "")
++   (match_operand:SI 3 "const_int_operand"))]
++ ""
++{
++  rtx (*gen) (rtx, rtx, rtx, rtx);
++
++  /* Use an atomic load-operate instruction when possible.  */
++  if (aarch64_atomic_ldop_supported_p (<CODE>))
++    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
++  else
++    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
++
++  emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
++
++  DONE;
++})
++
++(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>"
+   [(set (match_operand:ALLI 0 "register_operand" "=&r")
+     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+    (set (match_dup 1)
+@@ -181,6 +354,26 @@
+   }
+ )
+ 
++(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
++  [(set (match_operand:ALLI 0 "register_operand" "=&r")
++    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
++   (set (match_dup 1)
++    (unspec_volatile:ALLI
++      [(atomic_op:ALLI (match_dup 1)
++	(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
++       (match_operand:SI 3 "const_int_operand")]
++      UNSPECV_ATOMIC_LDOP))]
++  "TARGET_LSE"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
++			     operands[2], operands[3]);
++    DONE;
++  }
++)
++
+ (define_insn_and_split "atomic_fetch_nand<mode>"
+   [(set (match_operand:ALLI 0 "register_operand" "=&r")
+     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+@@ -205,7 +398,31 @@
+   }
+ )
+ 
+-(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
++;; Load-operate-store, returning the original memory data.
++
++(define_expand "atomic_<atomic_optab>_fetch<mode>"
++ [(match_operand:ALLI 0 "register_operand" "")
++  (atomic_op:ALLI
++   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
++   (match_operand:ALLI 2 "<atomic_op_operand>" ""))
++  (match_operand:SI 3 "const_int_operand")]
++ ""
++{
++  rtx (*gen) (rtx, rtx, rtx, rtx);
++  rtx value = operands[2];
++
++  /* Use an atomic load-operate instruction when possible.  */
++  if (aarch64_atomic_ldop_supported_p (<CODE>))
++    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
++  else
++    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
++
++  emit_insn (gen (operands[0], operands[1], value, operands[3]));
++
++  DONE;
++})
++
++(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>"
+   [(set (match_operand:ALLI 0 "register_operand" "=&r")
+     (atomic_op:ALLI
+       (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+@@ -228,6 +445,29 @@
+   }
+ )
+ 
++(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
++  [(set (match_operand:ALLI 0 "register_operand" "=&r")
++    (atomic_op:ALLI
++     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
++     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
++   (set (match_dup 1)
++    (unspec_volatile:ALLI
++      [(match_dup 1)
++       (match_dup 2)
++       (match_operand:SI 3 "const_int_operand")]
++      UNSPECV_ATOMIC_LDOP))
++     (clobber (match_scratch:ALLI 4 "=r"))]
++  "TARGET_LSE"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  {
++    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
++			     operands[2], operands[3]);
++    DONE;
++  }
++)
++
+ (define_insn_and_split "atomic_nand_fetch<mode>"
+   [(set (match_operand:ALLI 0 "register_operand" "=&r")
+     (not:ALLI
+@@ -370,3 +610,100 @@
+       return "dmb\\tish";
+   }
+ )
++
++;; ARMv8.1 LSE instructions.
++
++;; Atomic swap with memory.
++(define_insn "aarch64_atomic_swp<mode>"
++ [(set (match_operand:ALLI 0 "register_operand" "+&r")
++   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
++  (set (match_dup 1)
++   (unspec_volatile:ALLI
++    [(match_operand:ALLI 2 "register_operand" "r")
++     (match_operand:SI 3 "const_int_operand" "")]
++    UNSPECV_ATOMIC_SWP))]
++  "TARGET_LSE && reload_completed"
++  {
++    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
++    if (is_mm_relaxed (model))
++      return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
++    else if (is_mm_acquire (model) || is_mm_consume (model))
++      return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
++    else if (is_mm_release (model))
++      return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
++    else
++      return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
++  })
++
++;; Atomic compare-and-swap: HI and smaller modes.
++
++(define_insn "aarch64_atomic_cas<mode>"
++ [(set (match_operand:SI 0 "register_operand" "+&r")		  ;; out
++   (zero_extend:SI
++    (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q")))  ;; memory.
++  (set (match_dup 1)
++   (unspec_volatile:SHORT
++    [(match_dup 0)
++     (match_operand:SHORT 2 "register_operand" "r")	;; value.
++     (match_operand:SI 3 "const_int_operand" "")]	;; model.
++    UNSPECV_ATOMIC_CAS))]
++ "TARGET_LSE && reload_completed"
++{
++  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
++  if (is_mm_relaxed (model))
++    return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
++  else if (is_mm_acquire (model) || is_mm_consume (model))
++    return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
++  else if (is_mm_release (model))
++    return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
++  else
++    return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
++})
++
++;; Atomic compare-and-swap: SI and larger modes.
++
++(define_insn "aarch64_atomic_cas<mode>"
++ [(set (match_operand:GPI 0 "register_operand" "+&r")	      ;; out
++   (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))  ;; memory.
++  (set (match_dup 1)
++   (unspec_volatile:GPI
++    [(match_dup 0)
++     (match_operand:GPI 2 "register_operand" "r")	;; value.
++     (match_operand:SI 3 "const_int_operand" "")]	;; model.
++    UNSPECV_ATOMIC_CAS))]
++  "TARGET_LSE && reload_completed"
++{
++    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
++    if (is_mm_relaxed (model))
++      return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
++    else if (is_mm_acquire (model) || is_mm_consume (model))
++      return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
++    else if (is_mm_release (model))
++      return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
++    else
++      return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
++})
++
++;; Atomic load-op: Load data, operate, store result, keep data.
++
++(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
++ [(set (match_operand:ALLI 0 "register_operand" "=r")
++   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
++  (set (match_dup 1)
++   (unspec_volatile:ALLI
++    [(match_dup 1)
++     (match_operand:ALLI 2 "register_operand")
++     (match_operand:SI 3 "const_int_operand")]
++    ATOMIC_LDOP))]
++ "TARGET_LSE && reload_completed"
++ {
++   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
++   if (is_mm_relaxed (model))
++     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
++   else if (is_mm_acquire (model) || is_mm_consume (model))
++     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
++   else if (is_mm_release (model))
++     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
++   else
++     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
++ })
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/cortex-a57-fma-steering.c
+@@ -0,0 +1,1099 @@
++/* FMA steering optimization pass for Cortex-A57.
++   Copyright (C) 2015 Free Software Foundation, Inc.
++   Contributed by ARM Ltd.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "regs.h"
++#include "insn-config.h"
++#include "hard-reg-set.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfganal.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "insn-attr.h"
++#include "machmode.h"
++#include "recog.h"
++#include "output.h"
++#include "vec.h"
++#include "hash-map.h"
++#include "bitmap.h"
++#include "obstack.h"
++#include "df.h"
++#include "target.h"
++#include "rtl.h"
++#include "context.h"
++#include "tree-pass.h"
++#include "regrename.h"
++#include "cortex-a57-fma-steering.h"
++#include "aarch64-protos.h"
++
++#include <list>
++
++/* For better performance, the destination of FMADD/FMSUB instructions should
++   have the same parity as their accumulator register if the accumulator
++   contains the result of a previous FMUL or FMADD/FMSUB instruction if
++   targetting Cortex-A57 processors.  Performance is also increased by
++   otherwise keeping a good balance in the parity of the destination register
++   of FMUL or FMADD/FMSUB.
++
++   This pass ensure that registers are renamed so that these conditions hold.
++   We reuse the existing register renaming facility from regrename.c to build
++   dependency chains and expose candidate registers for renaming.
++
++
++   The algorithm has three steps:
++
++   First, the functions of the register renaming pass are called.  These
++   analyze the instructions and produce a list of def/use chains of
++   instructions.
++
++   Next, this information is used to build trees of multiply and
++   multiply-accumulate instructions.  The roots of these trees are any
++   multiply, or any multiply-accumulate whose accumulator is not dependent on
++   a multiply or multiply-accumulate instruction.  A child is added to the
++   tree where a dependency chain exists between the result of the parent
++   instruction and the accumulator operand of the child, as in the diagram
++   below:
++
++		 fmul s2, s0, s1
++		/		\
++   fmadd s0, s1, s1, s2   fmadd s4, s1, s1 s2
++	    |
++   fmadd s3, s1, s1, s0
++
++   Trees made of a single instruction are permitted.
++
++   Finally, renaming is performed.  The parity of the destination register at
++   the root of a tree is checked against the current balance of multiply and
++   multiply-accumulate on each pipeline.  If necessary, the root of a tree is
++   renamed, in which case the rest of the tree is then renamed to keep the same
++   parity in the destination registers of all instructions in the tree.  */
++
++
++
++/* Forward declarations.  */
++class fma_node;
++class fma_root_node;
++class func_fma_steering;
++
++/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
++   FMADD/FMSUB instructions form a graph.  This is because alternatives can
++   make a register be set by several FMUL or FMADD/FMSUB instructions in
++   different basic blocks and because of loops.  For ease of browsing, the
++   connected components of this graph are broken up into forests of trees.
++   Forests are represented by fma_forest objects, contained in the fma_forests
++   list.  Using a separate object for the forests allows for a better use of
++   memory as there is some information that is global to each forest, such as
++   the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
++   floating-point execution pipelines.  */
++
++class fma_forest
++{
++public:
++  fma_forest (func_fma_steering *, fma_root_node *, int);
++  ~fma_forest ();
++
++  int get_id ();
++  std::list<fma_root_node *> *get_roots ();
++  func_fma_steering *get_globals ();
++  int get_target_parity ();
++  void fma_node_created (fma_node *);
++  void merge_forest (fma_forest *);
++  void dump_info ();
++  void dispatch ();
++
++private:
++  /* The list of roots that form this forest.  */
++  std::list<fma_root_node *> *m_roots;
++
++  /* Target parity the destination register of all FMUL and FMADD/FMSUB
++     instructions in this forest should have.  */
++  int m_target_parity;
++
++  /* Link to the instance of func_fma_steering holding data related to the
++     FMA steering of the current function (cfun).  */
++  func_fma_steering *m_globals;
++
++  /* Identifier for the forest (used for dumps).  */
++  int m_id;
++
++  /* Total number of nodes in the forest (for statistics).  */
++  int m_nb_nodes;
++};
++
++class fma_node
++{
++public:
++  fma_node (fma_node *parent, du_chain *chain);
++  ~fma_node ();
++
++  bool root_p ();
++  fma_forest *get_forest ();
++  std::list<fma_node *> *get_children ();
++  rtx_insn *get_insn ();
++  void add_child (fma_node *);
++  int get_parity ();
++  void set_head (du_head *);
++  void rename (fma_forest *);
++  void dump_info (fma_forest *);
++
++protected:
++  /* Root node that lead to this node.  */
++  fma_root_node *m_root;
++
++  /* The parent node of this node.  If the node belong to a chain with several
++     parent nodes, the first one encountered in a depth-first search is chosen
++     as canonical parent.  */
++  fma_node *m_parent;
++
++  /* The list of child nodes.  If a chain contains several parent nodes, one is
++     chosen as canonical parent and the others will have no children.  */
++  std::list<fma_node *> *m_children;
++
++  /* The associated DU_HEAD chain that the insn represented by this object
++     is (one of) the root of.  When a chain contains several roots, the non
++     canonical ones have this field set to NULL.  */
++  struct du_head *m_head;
++
++  /* The FMUL or FMADD/FMSUB instruction this object corresponds to.  */
++  rtx_insn *m_insn;
++};
++
++class fma_root_node : public fma_node
++{
++public:
++  fma_root_node (func_fma_steering *, du_chain *, int);
++
++  fma_forest *get_forest ();
++  void set_forest (fma_forest *);
++  void dump_info (fma_forest *);
++
++private:
++  /* The forest this node belonged to when it was created.  */
++  fma_forest *m_forest;
++};
++
++/* Class holding all data and methods relative to the FMA steering of a given
++   function.  The FMA steering pass could then run in parallel for different
++   functions.  */
++
++class func_fma_steering
++{
++public:
++  func_fma_steering ();
++  ~func_fma_steering ();
++
++  int get_fpu_balance ();
++  void remove_forest (fma_forest *);
++  bool put_node (fma_node *);
++  void update_balance (int);
++  fma_node *get_fma_node (rtx_insn *);
++  void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
++  void execute_fma_steering ();
++
++private:
++  void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
++	    void (*) (fma_forest *, fma_node *), bool);
++  void analyze ();
++  void rename_fma_trees ();
++
++  /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
++     fma_node object.  Used when analyzing an instruction that is a root of
++     a chain to find if such an object was created because this instruction
++     is also a use in another chain.  */
++  hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
++
++  /* A list of all the forests in a given function.  */
++  std::list<fma_forest *> m_fma_forests;
++
++  /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
++     pipelines:
++     < 0: more instruction dispatched to the first pipeline
++     == 0: perfect balance
++     > 0: more instruction dispatched to the second pipeline.  */
++  int m_fpu_balance;
++
++  /* Identifier for the next forest created.  */
++  int m_next_forest_id;
++};
++
++/* Rename the register HEAD->regno in all the insns in the chain HEAD to any
++   register not in the set UNAVAILABLE.  Adapted from rename_chains in
++   regrename.c.  */
++
++static bool
++rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
++{
++  int best_new_reg;
++  int n_uses = 0;
++  struct du_chain *tmp;
++  int reg = head->regno;
++  enum reg_class super_class = NO_REGS;
++
++  if (head->cannot_rename)
++    return false;
++
++  if (fixed_regs[reg] || global_regs[reg]
++      || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
++    return false;
++
++  /* Iterate over elements in the chain in order to:
++     1. Count number of uses, and narrow the set of registers we can
++	use for renaming.
++     2. Compute the superunion of register classes in this chain.  */
++  for (tmp = head->first; tmp; tmp = tmp->next_use)
++    {
++      if (DEBUG_INSN_P (tmp->insn))
++	continue;
++      n_uses++;
++      IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
++      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
++    }
++
++  if (n_uses < 1)
++    return false;
++
++  best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
++				  false);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
++	       INSN_UID (head->first->insn));
++      if (head->need_caller_save_reg)
++	fprintf (dump_file, " crosses a call");
++    }
++
++  if (best_new_reg == reg)
++    {
++      if (dump_file)
++	fprintf (dump_file, "; no available better choice\n");
++      return false;
++    }
++
++  if (regrename_do_replace (head, best_new_reg))
++    {
++      if (dump_file)
++	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
++      df_set_regs_ever_live (best_new_reg, true);
++    }
++  else
++    {
++      if (dump_file)
++	fprintf (dump_file, ", renaming as %s failed\n",
++		 reg_names[best_new_reg]);
++      return false;
++    }
++  return true;
++}
++
++/* Return whether T is the attribute of a FMADD/FMSUB-like instruction.  */
++
++static bool
++is_fmac_op (enum attr_type t)
++{
++  return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
++}
++
++/* Return whether T is the attribute of a FMUL instruction.  */
++
++static bool
++is_fmul_op (enum attr_type t)
++{
++  return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
++}
++
++/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
++   instruction.  */
++
++static bool
++is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
++{
++  enum attr_type t;
++
++  if (!NONDEBUG_INSN_P (insn))
++    return false;
++
++  if (recog_memoized (insn) < 0)
++    return false;
++
++  /* Only consider chain(s) this instruction is a root of if this is an FMUL or
++     FMADD/FMSUB instruction.  This allows to avoid browsing chains of all
++     instructions for FMUL or FMADD/FMSUB in them.  */
++  t = get_attr_type (insn);
++  return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
++}
++
++
++/*
++ * Class fma_forest method definitions.
++ */
++
++fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
++			int id)
++{
++      memset (this, 0, sizeof (*this));
++      this->m_globals = fma_steer;
++      this->m_roots = new std::list<fma_root_node *>;
++      this->m_roots->push_back (fma_root);
++      this->m_id = id;
++}
++
++fma_forest::~fma_forest ()
++{
++  delete this->m_roots;
++}
++
++int
++fma_forest::get_id ()
++{
++  return this->m_id;
++}
++
++std::list<fma_root_node *> *
++fma_forest::get_roots ()
++{
++  return this->m_roots;
++}
++
++func_fma_steering *
++fma_forest::get_globals ()
++{
++  return this->m_globals;
++}
++
++int
++fma_forest::get_target_parity ()
++{
++  return this->m_target_parity;
++}
++
++/* Act on the creation of NODE by updating statistics in FOREST and adding an
++   entry for it in the func_fma_steering hashmap.  */
++
++void fma_forest::fma_node_created (fma_node *node)
++{
++  bool created = !this->m_globals->put_node (node);
++
++  gcc_assert (created);
++  this->m_nb_nodes++;
++}
++
++/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
++   fma_forest object to represent both.  */
++
++void
++fma_forest::merge_forest (fma_forest *other_forest)
++{
++  std::list<fma_root_node *> *other_roots;
++  std::list<fma_root_node *>::iterator other_root_iter;
++
++  if (this == other_forest)
++    return;
++
++  other_roots = other_forest->m_roots;
++
++  /* Update root nodes' pointer to forest.  */
++  for (other_root_iter = other_roots->begin ();
++       other_root_iter != other_roots->end (); other_root_iter++)
++    (*other_root_iter)->set_forest (this);
++
++  /* Remove other_forest from the list of forests and move its tree roots in
++     the list of tree roots of ref_forest.  */
++  this->m_globals->remove_forest (other_forest);
++  this->m_roots->splice (this->m_roots->begin (), *other_roots);
++  delete other_forest;
++
++  this->m_nb_nodes += other_forest->m_nb_nodes;
++}
++
++/* Dump information about the forest FOREST.  */
++
++void
++fma_forest::dump_info ()
++{
++  gcc_assert (dump_file);
++
++  fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
++	   this->m_nb_nodes);
++}
++
++/* Wrapper around fma_forest::dump_info for use as parameter of function
++   pointer type in func_fma_steering::dfs.  */
++
++static void
++dump_forest_info (fma_forest *forest)
++{
++  forest->dump_info ();
++}
++
++/* Dispatch forest to the least utilized pipeline.  */
++
++void
++fma_forest::dispatch ()
++{
++  this->m_target_parity = this->m_roots->front ()->get_parity ();
++  int fpu_balance = this->m_globals->get_fpu_balance ();
++  if (fpu_balance != 0)
++    this->m_target_parity = (fpu_balance < 0);
++
++  if (dump_file)
++    fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
++	     this->m_target_parity ? "odd" : "even");
++}
++
++/* Wrapper around fma_forest::dispatch for use as parameter of function pointer
++   type in func_fma_steering::dfs.  */
++
++static void
++dispatch_forest (fma_forest *forest)
++{
++  forest->dispatch ();
++}
++
++fma_node::fma_node (fma_node *parent, du_chain *chain)
++{
++  memset (this, 0, sizeof (*this));
++  this->m_parent = parent;
++  this->m_children = new std::list<fma_node *>;
++  this->m_insn = chain->insn;
++  /* root_p () cannot be used to check for root before root is set.  */
++  if (this->m_parent == this)
++    this->m_root = static_cast<fma_root_node *> (parent);
++  else
++    {
++      this->m_root = parent->m_root;
++      this->get_forest ()->fma_node_created (this);
++    }
++}
++
++fma_node::~fma_node ()
++{
++  delete this->m_children;
++}
++
++std::list<fma_node *> *
++fma_node::get_children ()
++{
++  return this->m_children;
++}
++
++rtx_insn *
++fma_node::get_insn ()
++{
++  return this->m_insn;
++}
++
++void
++fma_node::set_head (du_head *head)
++{
++  gcc_assert (!this->m_head);
++  this->m_head = head;
++}
++
++/* Add a child to this node in the list of children.  */
++
++void
++fma_node::add_child (fma_node *child)
++{
++  this->m_children->push_back (child);
++}
++
++/* Return the parity of the destination register of the instruction represented
++   by this node.  */
++
++int
++fma_node::get_parity ()
++{
++  return this->m_head->regno % 2;
++}
++
++/* Get the actual forest associated with a non root node as the one the node
++   points to might have been merged into another one.  In that case the pointer
++   in the root nodes are updated so we return the forest pointer of a root node
++   pointed to by the initial forest.  Despite being a oneliner, this method is
++   defined here as it references a method from fma_root_node.  */
++
++fma_forest *
++fma_node::get_forest ()
++{
++  return this->m_root->get_forest ();
++}
++
++/* Return whether a node is a root node.  */
++
++bool
++fma_node::root_p ()
++{
++  return this->m_root == this;
++}
++
++/* Dump information about the children of node FMA_NODE in forest FOREST.  */
++
++void
++fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
++{
++  struct du_chain *chain;
++  std::list<fma_node *>::iterator fma_child;
++
++  gcc_assert (dump_file);
++
++  if (this->get_children ()->empty ())
++    return;
++
++  fprintf (dump_file, "Instruction(s)");
++  for (chain = this->m_head->first; chain; chain = chain->next_use)
++    {
++      if (!is_fmul_fmac_insn (chain->insn, true))
++	continue;
++
++      if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
++	continue;
++
++      fprintf (dump_file, " %d", INSN_UID (chain->insn));
++    }
++
++  fprintf (dump_file, " is(are) accumulator dependency of instructions");
++  for (fma_child = this->get_children ()->begin ();
++       fma_child != this->get_children ()->end (); fma_child++)
++    fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
++  fprintf (dump_file, "\n");
++}
++
++/* Wrapper around fma_node::dump_info for use as parameter of function pointer
++   type in func_fma_steering::dfs.  */
++
++static void
++dump_tree_node_info (fma_forest *forest, fma_node *node)
++{
++  node->dump_info (forest);
++}
++
++/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
++   represented by FMA_NODE to a register that respect the target parity for
++   FOREST or with same parity of the instruction represented by its parent node
++   if it has one.  */
++
++void
++fma_node::rename (fma_forest *forest)
++{
++  int cur_parity, target_parity;
++
++  /* This is alternate root of a chain and thus has no children.  It will be
++     renamed when processing the canonical root for that chain.  */
++  if (!this->m_head)
++    return;
++
++  target_parity = forest->get_target_parity ();
++  if (this->m_parent)
++    target_parity = this->m_parent->get_parity ();
++  cur_parity = this->get_parity ();
++
++  /* Rename if parity differs.  */
++  if (cur_parity != target_parity)
++    {
++      rtx_insn *insn = this->m_insn;
++      HARD_REG_SET unavailable;
++      enum machine_mode mode;
++      int reg;
++
++      if (dump_file)
++	{
++	  unsigned cur_dest_reg = this->m_head->regno;
++
++	  fprintf (dump_file, "FMA or FMUL at insn %d but destination "
++		   "register (%s) has different parity from expected to "
++		   "maximize FPU pipeline utilization\n", INSN_UID (insn),
++		   reg_names[cur_dest_reg]);
++	}
++
++      /* Don't clobber traceback for noreturn functions.  */
++      CLEAR_HARD_REG_SET (unavailable);
++      if (frame_pointer_needed)
++	{
++	  add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
++	  add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
++	}
++
++      /* Exclude registers with wrong parity.  */
++      mode = GET_MODE (SET_DEST (PATTERN (insn)));
++      for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
++	add_to_hard_reg_set (&unavailable, mode, reg);
++
++      if (!rename_single_chain (this->m_head, &unavailable))
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Destination register of insn %d could not be "
++		     "renamed. Dependent FMA insns will use this parity from "
++		     "there on.\n", INSN_UID (insn));
++	}
++      else
++	cur_parity = target_parity;
++    }
++
++  forest->get_globals ()->update_balance (cur_parity);
++}
++
++/* Wrapper around fma_node::dump_info for use as parameter of function pointer
++   type in func_fma_steering::dfs.  */
++
++static void
++rename_fma_node (fma_forest *forest, fma_node *node)
++{
++  node->rename (forest);
++}
++
++fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
++			      int id) : fma_node (this, chain)
++{
++  this->m_forest = new fma_forest (globals, this, id);
++  this->m_forest->fma_node_created (this);
++}
++
++fma_forest *
++fma_root_node::get_forest ()
++{
++  return this->m_forest;
++}
++
++void
++fma_root_node::set_forest (fma_forest *ref_forest)
++{
++  this->m_forest = ref_forest;
++}
++
++/* Dump information about the roots of forest FOREST.  */
++
++void
++fma_root_node::dump_info (fma_forest *forest)
++{
++  gcc_assert (dump_file);
++
++  if (this == forest->get_roots ()->front ())
++    fprintf (dump_file, "Instruction(s) at root of forest #%d:",
++	     forest->get_id ());
++  fprintf (dump_file, " %d", INSN_UID (this->m_insn));
++  if (this == forest->get_roots ()->back ())
++    fprintf (dump_file, "\n");
++}
++
++/* Wrapper around fma_root_node::dump_info for use as parameter of function
++   pointer type in func_fma_steering::dfs.  */
++
++static void
++dump_tree_root_info (fma_forest *forest, fma_root_node *node)
++{
++  node->dump_info (forest);
++}
++
++func_fma_steering::func_fma_steering () : m_fpu_balance (0)
++{
++  this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
++  this->m_fma_forests.clear ();
++  this->m_next_forest_id = 0;
++}
++
++func_fma_steering::~func_fma_steering ()
++{
++  delete this->m_insn_fma_head_map;
++}
++
++int
++func_fma_steering::get_fpu_balance ()
++{
++  return this->m_fpu_balance;
++}
++
++void
++func_fma_steering::remove_forest (fma_forest *forest)
++{
++  this->m_fma_forests.remove (forest);
++}
++
++/* Memorize the mapping of this instruction to its fma_node object and return
++   whether such a mapping existed.  */
++
++bool
++func_fma_steering::put_node (fma_node *node)
++{
++  return this->m_insn_fma_head_map->put (node->get_insn (), node);
++}
++
++/* Update the current balance considering a node with the given PARITY.  */
++
++void
++func_fma_steering::update_balance (int parity)
++{
++  this->m_fpu_balance = parity ? this->m_fpu_balance + 1
++			       : this->m_fpu_balance - 1;
++}
++
++/* Return whether an fma_node object exists for instruction INSN and, if not,
++   allocate one in *RET.  */
++
++fma_node *
++func_fma_steering::get_fma_node (rtx_insn *insn)
++{
++  fma_node **fma_slot;
++
++  fma_slot = this->m_insn_fma_head_map->get (insn);
++  if (fma_slot)
++    return *fma_slot;
++  return NULL;
++}
++
++/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
++   instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
++   part of FOREST.  For the children, the associated head is left untouched
++   (and thus null) as this function will be called again when considering the
++   chain where they are def.  For the parent, the chain is given in HEAD.  */
++
++void
++func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
++					  du_chain *chain, du_head_p head)
++{
++  fma_forest *forest;
++  fma_node *node = this->get_fma_node (chain->insn);
++
++  /* This is a root node.  */
++  if (!node)
++    {
++      fma_root_node *root_node;
++
++      root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
++      forest = root_node->get_forest ();
++      node = root_node;
++
++      /* Until proved otherwise, assume this root is not part of an existing
++	 forest and thus add its forest to the list of forests.  */
++      this->m_fma_forests.push_back (forest);
++    }
++  else
++    forest = node->get_forest ();
++
++  node->set_head (head);
++
++  /* fma_node is part of a chain with several defs, one of them having already
++     been processed.  The root of that already processed def is the canonical
++     one and the root of fma_node is added to its forest.  No need to process
++     the children nodes as they were already processed when the other def was
++     processed.  */
++  if (ref_forest)
++    {
++      ref_forest->merge_forest (forest);
++      return;
++    }
++
++  for (chain = head->first; chain; chain = chain->next_use)
++    {
++      fma_node *child_fma;
++      rtx fma_rtx, *accum_rtx_p;
++
++      if (!is_fmul_fmac_insn (chain->insn, false))
++	continue;
++
++      /* Get FMA rtx.  */
++      fma_rtx = SET_SRC (PATTERN (chain->insn));
++      /* FMA is negated.  */
++      if (GET_CODE (fma_rtx) == NEG)
++	fma_rtx = XEXP (fma_rtx, 0);
++      /* Get accumulator rtx.  */
++      accum_rtx_p = &XEXP (fma_rtx, 2);
++      /* Accumulator is negated.  */
++      if (!REG_P (*accum_rtx_p))
++	accum_rtx_p = &XEXP (*accum_rtx_p, 0);
++
++      /* This du_chain structure is not for the accumulator register.  */
++      if (accum_rtx_p != chain->loc)
++	continue;
++
++      /* If object already created, this is a loop carried dependency.  We
++	 don't include this object in the children as we want trees for
++	 rename_fma_trees to not be an infinite loop.  */
++      if (this->get_fma_node (chain->insn))
++	continue;
++
++      child_fma = new fma_node (node, chain);
++
++      /* Memorize the mapping of this instruction to its fma_node object
++	 as it will be processed for the chain starting at its destination
++	 register later.  */
++
++      /* Link to siblings.  */
++      node->add_child (child_fma);
++    }
++}
++
++/* Perform a depth-first search of the forests of fma_node in
++   THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
++   THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
++   PROCESS_NODE () on each node.  If FREE is true, free all std::list in the
++   same dfs.  */
++
++void
++func_fma_steering::dfs (void (*process_forest) (fma_forest *),
++			void (*process_root) (fma_forest *, fma_root_node *),
++			void (*process_node) (fma_forest *, fma_node *),
++			bool free)
++{
++  vec<fma_node *> to_process;
++  std::list<fma_forest *>::iterator forest_iter;
++
++  to_process.create (0);
++
++  /* For each forest.  */
++  for (forest_iter = this->m_fma_forests.begin ();
++       forest_iter != this->m_fma_forests.end (); forest_iter++)
++    {
++      std::list<fma_root_node *>::iterator root_iter;
++
++      if (process_forest)
++	process_forest (*forest_iter);
++
++      /* For each tree root in this forest.  */
++      for (root_iter = (*forest_iter)->get_roots ()->begin ();
++	   root_iter != (*forest_iter)->get_roots ()->end (); root_iter++)
++	{
++	  if (process_root)
++	    process_root (*forest_iter, *root_iter);
++	  to_process.safe_push (*root_iter);
++	}
++
++      /* For each tree node in this forest.  */
++      while (!to_process.is_empty ())
++	{
++	  fma_node *node;
++	  std::list<fma_node *>::iterator child_iter;
++
++	  node = to_process.pop ();
++
++	  if (process_node)
++	    process_node (*forest_iter, node);
++
++	  /* Absence of children might indicate an alternate root of a *chain*.
++	     It's ok to skip it here as the chain will be renamed when
++	     processing the canonical root for that chain.  */
++	  if (node->get_children ()->empty ())
++	    continue;
++
++	  for (child_iter = node->get_children ()->begin ();
++	       child_iter != node->get_children ()->end (); child_iter++)
++	    to_process.safe_push (*child_iter);
++	  if (free)
++	    {
++	      if (node->root_p ())
++		delete static_cast<fma_root_node *> (node);
++	      else
++		delete node;
++	    }
++	}
++      if (free)
++	delete *forest_iter;
++    }
++
++  to_process.release ();
++}
++
++/* Build the dependency trees of FMUL and FMADD/FMSUB instructions.  */
++
++void
++func_fma_steering::analyze ()
++{
++  int i, n_blocks, *bb_dfs_preorder;
++  basic_block bb;
++  rtx_insn *insn;
++
++  bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
++  n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
++
++  /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
++     instructions.  */
++  for (i = 0; i < n_blocks; i++)
++    {
++      bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
++      FOR_BB_INSNS (bb, insn)
++	{
++	  operand_rr_info *dest_op_info;
++	  struct du_chain *chain;
++	  unsigned dest_regno;
++	  fma_forest *forest;
++	  du_head_p head;
++	  int i;
++
++	  if (!is_fmul_fmac_insn (insn, true))
++	    continue;
++
++	  /* Search the chain where this instruction is (one of) the root.  */
++	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
++	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
++	  for (i = 0; i < dest_op_info->n_chains; i++)
++	    {
++	      /* The register tracked by this chain does not match the
++		 destination register of insn.  */
++	      if (dest_op_info->heads[i]->regno != dest_regno)
++		continue;
++
++	      head = dest_op_info->heads[i];
++	      /* The chain was merged in another, find the new head.  */
++	      if (!head->first)
++		head = regrename_chain_from_id (head->id);
++
++	      /* Search the chain element for this instruction and, if another
++		 FMUL or FMADD/FMSUB instruction was already processed, note
++		 the forest of its tree.  */
++	      forest = NULL;
++	      for (chain = head->first; chain; chain = chain->next_use)
++		{
++		  fma_node **fma_slot;
++
++		  if (!is_fmul_fmac_insn (chain->insn, true))
++		    continue;
++
++		  /* This is a use, continue.  */
++		  if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
++		    continue;
++
++		  if (chain->insn == insn)
++		    break;
++
++		  fma_slot = this->m_insn_fma_head_map->get (chain->insn);
++		  if (fma_slot && (*fma_slot)->get_children ())
++		    forest = (*fma_slot)->get_forest ();
++		}
++	      if (chain)
++		break;
++	    }
++
++	  /* We didn't find a chain with a def for this instruction.  */
++	  gcc_assert (i < dest_op_info->n_chains);
++
++	  this->analyze_fma_fmul_insn (forest, chain, head);
++	}
++    }
++  free (bb_dfs_preorder);
++
++  if (dump_file)
++    this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
++	       false);
++}
++
++/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
++   the objective of keeping FPU pipeline balanced in term of instructions and
++   having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
++   scheduled on the same pipeline.  */
++
++void
++func_fma_steering::rename_fma_trees ()
++{
++  this->dfs (dispatch_forest, NULL, rename_fma_node, true);
++
++  if (dump_file && !this->m_fma_forests.empty ())
++    {
++      fprintf (dump_file, "Function %s has ", current_function_name ());
++      if (this->m_fpu_balance == 0)
++	fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
++		 "two FPU pipelines\n");
++      else if (this->m_fpu_balance > 0)
++	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
++		 "FPU pipeline\n", this->m_fpu_balance);
++      else /* this->m_fpu_balance < 0 */
++	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
++		 "FPU pipeline\n", - this->m_fpu_balance);
++    }
++}
++
++/* Execute FMA steering pass.  */
++
++void
++func_fma_steering::execute_fma_steering ()
++{
++  df_set_flags (DF_LR_RUN_DCE);
++  df_note_add_problem ();
++  df_analyze ();
++  df_set_flags (DF_DEFER_INSN_RESCAN);
++
++  regrename_init (true);
++  regrename_analyze (NULL);
++  this->analyze ();
++  this->rename_fma_trees ();
++  regrename_finish ();
++}
++
++const pass_data pass_data_fma_steering =
++{
++  RTL_PASS, /* type */
++  "fma_steering", /* name */
++  OPTGROUP_NONE, /* optinfo_flags */
++  TV_NONE, /* tv_id */
++  0, /* properties_required */
++  0, /* properties_provided */
++  0, /* properties_destroyed */
++  0, /* todo_flags_start */
++  TODO_df_finish, /* todo_flags_finish */
++};
++
++class pass_fma_steering : public rtl_opt_pass
++{
++public:
++  pass_fma_steering (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_fma_steering, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *)
++    {
++      return (aarch64_tune_params.extra_tuning_flags
++	      & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
++	      && optimize >= 2;
++    }
++
++  virtual unsigned int execute (function *)
++    {
++      func_fma_steering *fma_steering = new func_fma_steering;
++      fma_steering->execute_fma_steering ();
++      delete fma_steering;
++      return 0;
++    }
++
++}; // class pass_fma_steering
++
++/* Create a new fma steering pass instance.  */
++
++static rtl_opt_pass *
++make_pass_fma_steering (gcc::context *ctxt)
++{
++  return new pass_fma_steering (ctxt);
++}
++
++/* Register the FMA steering pass to the pass manager.  */
++
++void
++aarch64_register_fma_steering ()
++{
++  opt_pass *pass_fma_steering = make_pass_fma_steering (g);
++
++  static struct register_pass_info fma_steering_info
++    = { pass_fma_steering, "rnreg", 1, PASS_POS_INSERT_AFTER };
++
++  register_pass (&fma_steering_info);
++}
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/cortex-a57-fma-steering.h
+@@ -0,0 +1,22 @@
++/* This file contains declarations for the FMA steering optimization
++   pass for Cortex-A57.
++   Copyright (C) 2015 Free Software Foundation, Inc.
++   Contributed by ARM Ltd.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++void aarch64_register_fma_steering (void);
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/driver-aarch64.c
+@@ -0,0 +1,307 @@
++/* Native CPU detection for aarch64.
++   Copyright (C) 2015 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++
++struct arch_extension
++{
++  const char *ext;
++  const char *feat_string;
++};
++
++#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
++  { EXT_NAME, FEATURE_STRING },
++static struct arch_extension ext_to_feat_string[] =
++{
++#include "aarch64-option-extensions.def"
++};
++#undef AARCH64_OPT_EXTENSION
++
++
++struct aarch64_core_data
++{
++  const char* name;
++  const char* arch;
++  const char* implementer_id;
++  const char* part_no;
++};
++
++#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
++  { CORE_NAME, #ARCH, IMP, PART },
++
++static struct aarch64_core_data cpu_data [] =
++{
++#include "aarch64-cores.def"
++  { NULL, NULL, NULL, NULL }
++};
++
++#undef AARCH64_CORE
++
++struct aarch64_arch
++{
++  const char* id;
++  const char* name;
++};
++
++#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
++  { #ARCH, NAME  },
++
++static struct aarch64_arch aarch64_arches [] =
++{
++#include "aarch64-arches.def"
++  {NULL, NULL}
++};
++
++#undef AARCH64_ARCH
++
++/* Return the full architecture name string corresponding to the
++   identifier ID.  */
++
++static const char*
++get_arch_name_from_id (const char* id)
++{
++  unsigned int i = 0;
++
++  for (i = 0; aarch64_arches[i].id != NULL; i++)
++    {
++      if (strcmp (id, aarch64_arches[i].id) == 0)
++        return aarch64_arches[i].name;
++    }
++
++  return NULL;
++}
++
++
++/* Check wether the string CORE contains the same CPU part numbers
++   as BL_STRING.  For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03"
++   should return true.  */
++
++static bool
++valid_bL_string_p (const char** core, const char* bL_string)
++{
++  return strstr (bL_string, core[0]) != NULL
++         && strstr (bL_string, core[1]) != NULL;
++}
++
++/*  Return true iff ARR contains STR in one of its two elements.  */
++
++static bool
++contains_string_p (const char** arr, const char* str)
++{
++  bool res = false;
++
++  if (arr[0] != NULL)
++    {
++      res = strstr (arr[0], str) != NULL;
++      if (res)
++        return res;
++
++      if (arr[1] != NULL)
++        return strstr (arr[1], str) != NULL;
++    }
++
++  return false;
++}
++
++/* This will be called by the spec parser in gcc.c when it sees
++   a %:local_cpu_detect(args) construct.  Currently it will be called
++   with either "arch", "cpu" or "tune" as argument depending on if
++   -march=native, -mcpu=native or -mtune=native is to be substituted.
++
++   It returns a string containing new command line parameters to be
++   put at the place of the above two options, depending on what CPU
++   this is executed.  E.g. "-march=armv8-a" on a Cortex-A57 for
++   -march=native.  If the routine can't detect a known processor,
++   the -march or -mtune option is discarded.
++
++   For -mtune and -mcpu arguments it attempts to detect the CPU or
++   a big.LITTLE system.
++   ARGC and ARGV are set depending on the actual arguments given
++   in the spec.  */
++
++const char *
++host_detect_local_cpu (int argc, const char **argv)
++{
++  const char *arch_id = NULL;
++  const char *res = NULL;
++  static const int num_exts = ARRAY_SIZE (ext_to_feat_string);
++  char buf[128];
++  FILE *f = NULL;
++  bool arch = false;
++  bool tune = false;
++  bool cpu = false;
++  unsigned int i = 0;
++  unsigned int core_idx = 0;
++  const char* imps[2] = { NULL, NULL };
++  const char* cores[2] = { NULL, NULL };
++  unsigned int n_cores = 0;
++  unsigned int n_imps = 0;
++  bool processed_exts = false;
++  const char *ext_string = "";
++
++  gcc_assert (argc);
++
++  if (!argv[0])
++    goto not_found;
++
++  /* Are we processing -march, mtune or mcpu?  */
++  arch = strcmp (argv[0], "arch") == 0;
++  if (!arch)
++    tune = strcmp (argv[0], "tune") == 0;
++
++  if (!arch && !tune)
++    cpu = strcmp (argv[0], "cpu") == 0;
++
++  if (!arch && !tune && !cpu)
++    goto not_found;
++
++  f = fopen ("/proc/cpuinfo", "r");
++
++  if (f == NULL)
++    goto not_found;
++
++  /* Look through /proc/cpuinfo to determine the implementer
++     and then the part number that identifies a particular core.  */
++  while (fgets (buf, sizeof (buf), f) != NULL)
++    {
++      if (strstr (buf, "implementer") != NULL)
++	{
++	  for (i = 0; cpu_data[i].name != NULL; i++)
++	    if (strstr (buf, cpu_data[i].implementer_id) != NULL
++                && !contains_string_p (imps, cpu_data[i].implementer_id))
++	      {
++                if (n_imps == 2)
++                  goto not_found;
++
++                imps[n_imps++] = cpu_data[i].implementer_id;
++
++                break;
++	      }
++          continue;
++	}
++
++      if (strstr (buf, "part") != NULL)
++	{
++	  for (i = 0; cpu_data[i].name != NULL; i++)
++	    if (strstr (buf, cpu_data[i].part_no) != NULL
++                && !contains_string_p (cores, cpu_data[i].part_no))
++	      {
++                if (n_cores == 2)
++                  goto not_found;
++
++                cores[n_cores++] = cpu_data[i].part_no;
++	        core_idx = i;
++	        arch_id = cpu_data[i].arch;
++	        break;
++	      }
++          continue;
++        }
++      if (!tune && !processed_exts && strstr (buf, "Features") != NULL)
++        {
++          for (i = 0; i < num_exts; i++)
++            {
++              bool enabled = true;
++              char *p = NULL;
++              char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL);
++
++              p = strtok (feat_string, " ");
++
++              while (p != NULL)
++                {
++                  if (strstr (buf, p) == NULL)
++                    {
++                      enabled = false;
++                      break;
++                    }
++                  p = strtok (NULL, " ");
++                }
++              ext_string = concat (ext_string, "+", enabled ? "" : "no",
++                                   ext_to_feat_string[i].ext, NULL);
++            }
++          processed_exts = true;
++        }
++    }
++
++  fclose (f);
++  f = NULL;
++
++  /* Weird cpuinfo format that we don't know how to handle.  */
++  if (n_cores == 0 || n_cores > 2 || n_imps != 1)
++    goto not_found;
++
++  if (arch && !arch_id)
++    goto not_found;
++
++  if (arch)
++    {
++      const char* arch_name = get_arch_name_from_id (arch_id);
++
++      /* We got some arch indentifier that's not in aarch64-arches.def?  */
++      if (!arch_name)
++        goto not_found;
++
++      res = concat ("-march=", arch_name, NULL);
++    }
++  /* We have big.LITTLE.  */
++  else if (n_cores == 2)
++    {
++      for (i = 0; cpu_data[i].name != NULL; i++)
++        {
++          if (strchr (cpu_data[i].part_no, '.') != NULL
++              && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0
++              && valid_bL_string_p (cores, cpu_data[i].part_no))
++            {
++              res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);
++              break;
++            }
++        }
++      if (!res)
++        goto not_found;
++    }
++  /* The simple, non-big.LITTLE case.  */
++  else
++    {
++      if (strncmp (cpu_data[core_idx].implementer_id, imps[0],
++                   strlen (imps[0]) - 1) != 0)
++        goto not_found;
++
++      res = concat ("-m", cpu ? "cpu" : "tune", "=",
++                      cpu_data[core_idx].name, NULL);
++    }
++
++  if (tune)
++    return res;
++
++  res = concat (res, ext_string, NULL);
++
++  return res;
++
++not_found:
++  {
++   /* If detection fails we ignore the option.
++      Clean up and return empty string.  */
++
++    if (f)
++      fclose (f);
++
++    return "";
++  }
++}
++
+--- a/src/gcc/config/aarch64/iterators.md
++++ b/src/gcc/config/aarch64/iterators.md
+@@ -537,24 +537,15 @@
+ 
+ (define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI")
+ 			(V2SI "V8SI")  (V2SF "V8SF")
+-			(DI   "V4DI")  (DF   "V4DF")
+-			(V16QI "V32QI") (V8HI "V16HI")
+-			(V4SI "V8SI")  (V4SF "V8SF")
+-			(V2DI "V4DI")  (V2DF "V4DF")])
++			(DI   "V4DI")  (DF   "V4DF")])
+ 
+ (define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI")
+ 			(V2SI "V12SI")  (V2SF "V12SF")
+-			(DI   "V6DI")  (DF   "V6DF")
+-			(V16QI "V48QI") (V8HI "V24HI")
+-			(V4SI "V12SI")  (V4SF "V12SF")
+-			(V2DI "V6DI")  (V2DF "V6DF")])
++			(DI   "V6DI")  (DF   "V6DF")])
+ 
+ (define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI")
+ 			(V2SI "V16SI")  (V2SF "V16SF")
+-			(DI   "V8DI")  (DF   "V8DF")
+-			(V16QI "V64QI") (V8HI "V32HI")
+-			(V4SI "V16SI")  (V4SF "V16SF")
+-			(V2DI "V8DI")  (V2DF "V8DF")])
++			(DI   "V8DI")  (DF   "V8DF")])
+ 
+ (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
+ 
+--- a/src/gcc/config/aarch64/t-aarch64
++++ b/src/gcc/config/aarch64/t-aarch64
+@@ -48,6 +48,16 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
+ 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ 		$(srcdir)/config/arm/aarch-common.c
+ 
++cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \
++    $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
++    dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
++    output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
++    $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
++    $(srcdir)/config/aarch64/cortex-a57-fma-steering.h \
++    $(srcdir)/config/aarch64/aarch64-protos.h
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/aarch64/cortex-a57-fma-steering.c
++
+ comma=,
+ MULTILIB_OPTIONS    = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
+ MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
+--- a/src//dev/null
++++ b/src/gcc/config/aarch64/x-aarch64
+@@ -0,0 +1,3 @@
++driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \
++  $(CONFIG_H) $(SYSTEM_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+--- a/src/gcc/config/alpha/linux.h
++++ b/src/gcc/config/alpha/linux.h
+@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+ #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+ #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
+ #else
+ #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+ #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+ #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
+ #endif
+ 
+ /* Determine what functions are present at the runtime;
+--- a/src/gcc/config/arm/aarch-common-protos.h
++++ b/src/gcc/config/arm/aarch-common-protos.h
+@@ -102,6 +102,8 @@ struct mem_cost_table
+   const int storef;		/* SFmode.  */
+   const int stored;		/* DFmode.  */
+   const int store_unaligned;	/* Extra for unaligned stores.  */
++  const int loadv;		/* Vector load.  */
++  const int storev;		/* Vector store.  */
+ };
+ 
+ struct fp_cost_table
+--- a/src/gcc/config/arm/aarch-cost-tables.h
++++ b/src/gcc/config/arm/aarch-cost-tables.h
+@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs =
+     1,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (2),	/* storef.  */
+     COSTS_N_INSNS (3),	/* stored.  */
+-    COSTS_N_INSNS (1)  /* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs =
+     0,			/* arith.  */
+     0,			/* logical.  */
+     COSTS_N_INSNS (1),	/* shift.  */
+-    COSTS_N_INSNS (2),	/* shift_reg.  */
++    0,			/* shift_reg.  */
+     COSTS_N_INSNS (1),	/* arith_shift.  */
+-    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
++    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+     COSTS_N_INSNS (1),	/* log_shift.  */
+-    COSTS_N_INSNS (2),	/* log_shift_reg.  */
+-    0,			/* extend.  */
++    COSTS_N_INSNS (1),	/* log_shift_reg.  */
++    COSTS_N_INSNS (1),	/* extend.  */
+     COSTS_N_INSNS (1),	/* extend_arith.  */
+     COSTS_N_INSNS (1),	/* bfi.  */
+     COSTS_N_INSNS (1),	/* bfx.  */
+@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
+     2,				/* stm_regs_per_insn_subsequent.  */
+     0,				/* storef.  */
+     0,				/* stored.  */
+-    COSTS_N_INSNS (1)		/* store_unaligned.  */
++    COSTS_N_INSNS (1),		/* store_unaligned.  */
++    COSTS_N_INSNS (1),		/* loadv.  */
++    COSTS_N_INSNS (1)		/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
+     2,                         /* stm_regs_per_insn_subsequent.  */
+     0,                         /* storef.  */
+     0,                         /* stored.  */
+-    COSTS_N_INSNS (1)          /* store_unaligned.  */
++    COSTS_N_INSNS (1),         /* store_unaligned.  */
++    COSTS_N_INSNS (1),         /* loadv.  */
++    COSTS_N_INSNS (1)          /* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs =
+     0,                         /* storef.  */
+     0,                         /* stored.  */
+     0,                         /* store_unaligned.  */
++    COSTS_N_INSNS (1),         /* loadv.  */
++    COSTS_N_INSNS (1)          /* storev.  */
+   },
+   {
+     /* FP SFmode */
+--- a/src/gcc/config/arm/arm-arches.def
++++ b/src/gcc/config/arm/arm-arches.def
+@@ -44,7 +44,8 @@ ARM_ARCH("armv6",   arm1136js,  6,   FL_CO_PROC |             FL_FOR_ARCH6)
+ ARM_ARCH("armv6j",  arm1136js,  6J,  FL_CO_PROC |             FL_FOR_ARCH6J)
+ ARM_ARCH("armv6k",  mpcore,	6K,  FL_CO_PROC |             FL_FOR_ARCH6K)
+ ARM_ARCH("armv6z",  arm1176jzs, 6Z,  FL_CO_PROC |             FL_FOR_ARCH6Z)
+-ARM_ARCH("armv6zk", arm1176jzs, 6ZK, FL_CO_PROC |             FL_FOR_ARCH6ZK)
++ARM_ARCH("armv6kz", arm1176jzs, 6KZ, FL_CO_PROC |             FL_FOR_ARCH6KZ)
++ARM_ARCH("armv6zk", arm1176jzs, 6KZ, FL_CO_PROC |             FL_FOR_ARCH6KZ)
+ ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC |             FL_FOR_ARCH6T2)
+ ARM_ARCH("armv6-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
+ ARM_ARCH("armv6s-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
+--- a/src/gcc/config/arm/arm-builtins.c
++++ b/src/gcc/config/arm/arm-builtins.c
+@@ -89,7 +89,9 @@ enum arm_type_qualifiers
+   /* qualifier_const_pointer | qualifier_map_mode  */
+   qualifier_const_pointer_map_mode = 0x86,
+   /* Polynomial types.  */
+-  qualifier_poly = 0x100
++  qualifier_poly = 0x100,
++  /* Lane indices - must be within range of previous argument = a vector.  */
++  qualifier_lane_index = 0x200
+ };
+ 
+ /*  The qualifier_internal allows generation of a unary builtin from
+@@ -120,21 +122,40 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ 
+ /* T (T, immediate).  */
+ static enum arm_type_qualifiers
+-arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+   = { qualifier_none, qualifier_none, qualifier_immediate };
++#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
++
++/* T (T, lane index).  */
++static enum arm_type_qualifiers
++arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++  = { qualifier_none, qualifier_none, qualifier_lane_index };
+ #define GETLANE_QUALIFIERS (arm_getlane_qualifiers)
+ 
+ /* T (T, T, T, immediate).  */
+ static enum arm_type_qualifiers
+-arm_lanemac_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++arm_mac_n_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+   = { qualifier_none, qualifier_none, qualifier_none,
+       qualifier_none, qualifier_immediate };
+-#define LANEMAC_QUALIFIERS (arm_lanemac_qualifiers)
++#define MAC_N_QUALIFIERS (arm_mac_n_qualifiers)
++
++/* T (T, T, T, lane index).  */
++static enum arm_type_qualifiers
++arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++  = { qualifier_none, qualifier_none, qualifier_none,
++      qualifier_none, qualifier_lane_index };
++#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
+ 
+ /* T (T, T, immediate).  */
+ static enum arm_type_qualifiers
+-arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
++#define TERNOP_IMM_QUALIFIERS (arm_ternop_imm_qualifiers)
++
++/* T (T, T, lane index).  */
++static enum arm_type_qualifiers
++arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++  = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
+ #define SETLANE_QUALIFIERS (arm_setlane_qualifiers)
+ 
+ /* T (T, T).  */
+@@ -525,12 +546,16 @@ enum arm_builtins
+ #undef CRYPTO2
+ #undef CRYPTO3
+ 
++  ARM_BUILTIN_NEON_BASE,
++  ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE,
++
+ #include "arm_neon_builtins.def"
+ 
+   ARM_BUILTIN_MAX
+ };
+ 
+-#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
++#define ARM_BUILTIN_NEON_PATTERN_START \
++    (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
+ 
+ #undef CF
+ #undef VAR1
+@@ -889,7 +914,7 @@ arm_init_simd_builtin_scalar_types (void)
+ static void
+ arm_init_neon_builtins (void)
+ {
+-  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
++  unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START;
+ 
+   arm_init_simd_builtin_types ();
+ 
+@@ -899,6 +924,15 @@ arm_init_neon_builtins (void)
+      system.  */
+   arm_init_simd_builtin_scalar_types ();
+ 
++  tree lane_check_fpr = build_function_type_list (void_type_node,
++						  intSI_type_node,
++						  intSI_type_node,
++						  NULL);
++  arm_builtin_decls[ARM_BUILTIN_NEON_LANE_CHECK] =
++      add_builtin_function ("__builtin_arm_lane_check", lane_check_fpr,
++			    ARM_BUILTIN_NEON_LANE_CHECK, BUILT_IN_MD,
++			    NULL, NULL_TREE);
++
+   for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++)
+     {
+       bool print_type_signature_p = false;
+@@ -1939,6 +1973,7 @@ arm_expand_unop_builtin (enum insn_code icode,
+ typedef enum {
+   NEON_ARG_COPY_TO_REG,
+   NEON_ARG_CONSTANT,
++  NEON_ARG_LANE_INDEX,
+   NEON_ARG_MEMORY,
+   NEON_ARG_STOP
+ } builtin_arg;
+@@ -2055,6 +2090,16 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
+ 		op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+ 	      break;
+ 
++	    case NEON_ARG_LANE_INDEX:
++	      /* Previous argument must be a vector, which this indexes.  */
++	      gcc_assert (argc > 0);
++	      if (CONST_INT_P (op[argc]))
++		{
++		  enum machine_mode vmode = mode[argc - 1];
++		  neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
++		}
++	      /* Fall through - if the lane index isn't a constant then
++		 the next case will error.  */
+ 	    case NEON_ARG_CONSTANT:
+ 	      if (!(*insn_data[icode].operand[opno].predicate)
+ 		  (op[argc], mode[argc]))
+@@ -2151,14 +2196,31 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
+   return target;
+ }
+ 
+-/* Expand a Neon builtin. These are "special" because they don't have symbolic
++/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds.
++   Most of these are "special" because they don't have symbolic
+    constants defined per-instruction or per instruction-variant. Instead, the
+    required info is looked up in the table neon_builtin_data.  */
+ static rtx
+ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+ {
++  if (fcode == ARM_BUILTIN_NEON_LANE_CHECK)
++    {
++      /* Builtin is only to check bounds of the lane passed to some intrinsics
++	 that are implemented with gcc vector extensions in arm_neon.h.  */
++
++      tree nlanes = CALL_EXPR_ARG (exp, 0);
++      gcc_assert (TREE_CODE (nlanes) == INTEGER_CST);
++      rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1));
++      if (CONST_INT_P (lane_idx))
++	neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp);
++      else
++	error ("%Klane index must be a constant immediate", exp);
++      /* Don't generate any RTL.  */
++      return const0_rtx;
++    }
++
+   neon_builtin_datum *d =
+-		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
++		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START];
+   enum insn_code icode = d->code;
+   builtin_arg args[SIMD_MAX_BUILTIN_ARGS];
+   int num_args = insn_data[d->code].n_operands;
+@@ -2182,7 +2244,9 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+       int operands_k = k - is_void;
+       int expr_args_k = k - 1;
+ 
+-      if (d->qualifiers[qualifiers_k] & qualifier_immediate)
++      if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
++	args[k] = NEON_ARG_LANE_INDEX;
++      else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+ 	args[k] = NEON_ARG_CONSTANT;
+       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
+ 	{
+--- a/src/gcc/config/arm/arm-c.c
++++ b/src/gcc/config/arm/arm-c.c
+@@ -31,7 +31,11 @@
+ #include "wide-int.h"
+ #include "inchash.h"
+ #include "tree.h"
++#include "tm_p.h"
+ #include "c-family/c-common.h"
++#include "target.h"
++#include "target-def.h"
++#include "c-family/c-pragma.h"
+ 
+ /* Output C specific EABI object attributes.  These can not be done in
+    arm.c because they require information from the C frontend.  */
+@@ -51,3 +55,223 @@ arm_lang_object_attributes_init (void)
+ {
+   arm_lang_output_object_attributes_hook = arm_output_c_attributes;
+ }
++
++#define builtin_define(TXT) cpp_define (pfile, TXT)
++#define builtin_assert(TXT) cpp_assert (pfile, TXT)
++
++/* Define or undefine macros based on the current target.  If the user does
++   #pragma GCC target, we need to adjust the macros dynamically.  */
++
++static void
++def_or_undef_macro(struct cpp_reader* pfile, const char *name, bool def_p)
++{
++  if (def_p)
++    cpp_define (pfile, name);
++  else
++    cpp_undef (pfile, name);
++}
++
++static void
++arm_cpu_builtins (struct cpp_reader* pfile)
++{
++  def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY);
++  def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); 
++  def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT);
++  if (TARGET_CRYPTO)
++    builtin_define ("__ARM_FEATURE_CRYPTO");
++  if (unaligned_access)
++    builtin_define ("__ARM_FEATURE_UNALIGNED");
++  if (TARGET_CRC32)
++    builtin_define ("__ARM_FEATURE_CRC32");
++
++  def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); 
++
++  if (TARGET_ARM_FEATURE_LDREX)
++    builtin_define_with_int_value ("__ARM_FEATURE_LDREX", 
++				   TARGET_ARM_FEATURE_LDREX);
++  else
++    cpp_undef (pfile, "__ARM_FEATURE_LDREX");
++
++  def_or_undef_macro (pfile, "__ARM_FEATURE_CLZ",
++		      ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)
++		       || TARGET_ARM_ARCH_ISA_THUMB >=2));
++
++  def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD);
++
++  builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
++				 flag_short_enums ? 1 : 4);
++  builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", wchar_type_node);
++  if (TARGET_ARM_ARCH_PROFILE)
++    builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
++				   TARGET_ARM_ARCH_PROFILE);
++
++  /* Define __arm__ even when in thumb mode, for
++     consistency with armcc.  */
++  builtin_define ("__arm__");
++  if (TARGET_ARM_ARCH)
++    builtin_define_with_int_value ("__ARM_ARCH", TARGET_ARM_ARCH);
++  if (arm_arch_notm)
++    builtin_define ("__ARM_ARCH_ISA_ARM");
++  builtin_define ("__APCS_32__");
++
++  def_or_undef_macro (pfile, "__thumb__", TARGET_THUMB);
++  def_or_undef_macro (pfile, "__thumb2__", TARGET_THUMB2);
++  if (TARGET_BIG_END)
++    def_or_undef_macro (pfile, "__THUMBEB__", TARGET_THUMB);
++  else
++    def_or_undef_macro (pfile, "__THUMBEL__", TARGET_THUMB);
++
++  if (TARGET_ARM_ARCH_ISA_THUMB)
++    builtin_define_with_int_value ("__ARM_ARCH_ISA_THUMB",
++				   TARGET_ARM_ARCH_ISA_THUMB);
++
++  if (TARGET_BIG_END)
++    {
++      builtin_define ("__ARMEB__");
++      builtin_define ("__ARM_BIG_ENDIAN");
++    }
++  else
++    {
++      builtin_define ("__ARMEL__");
++    }
++
++  if (TARGET_SOFT_FLOAT)
++    builtin_define ("__SOFTFP__");
++
++  if (TARGET_VFP)
++    builtin_define ("__VFP_FP__");
++	
++  if (TARGET_ARM_FP)
++    builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP);
++  if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
++    builtin_define ("__ARM_FP16_FORMAT_IEEE");
++  if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
++    builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE");
++  if (TARGET_FMA)
++    builtin_define ("__ARM_FEATURE_FMA");
++
++  if (TARGET_NEON)
++    {
++      builtin_define ("__ARM_NEON__");
++      builtin_define ("__ARM_NEON");
++    }
++  if (TARGET_NEON_FP)
++    builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP);
++  
++  /* Add a define for interworking. Needed when building libgcc.a.  */
++  if (arm_cpp_interwork)
++    builtin_define ("__THUMB_INTERWORK__");
++
++
++  builtin_define (arm_arch_name);
++  if (arm_arch_xscale)
++    builtin_define ("__XSCALE__");
++  if (arm_arch_iwmmxt)
++    {
++      builtin_define ("__IWMMXT__");
++      builtin_define ("__ARM_WMMX");
++    }
++  if (arm_arch_iwmmxt2)
++    builtin_define ("__IWMMXT2__");
++  /* ARMv6KZ was originally identified as the misspelled __ARM_ARCH_6ZK__.  To
++     preserve the existing behaviour, the misspelled feature macro must still be
++     defined.  */
++  if (arm_arch6kz)
++    builtin_define ("__ARM_ARCH_6ZK__");
++  if (TARGET_AAPCS_BASED)
++    {
++      if (arm_pcs_default == ARM_PCS_AAPCS_VFP)
++	builtin_define ("__ARM_PCS_VFP");
++      else if (arm_pcs_default == ARM_PCS_AAPCS)
++	builtin_define ("__ARM_PCS");
++      builtin_define ("__ARM_EABI__");
++    }
++
++  def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);
++  def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);
++
++  def_or_undef_macro (pfile, "__ARM_ASM_SYNTAX_UNIFIED__", inline_asm_unified);
++}
++
++void
++arm_cpu_cpp_builtins (struct cpp_reader * pfile)
++{
++  builtin_assert ("cpu=arm");
++  builtin_assert ("machine=arm");
++
++  arm_cpu_builtins (pfile);
++}
++
++/* Hook to validate the current #pragma GCC target and set the arch custom
++   mode state.  If ARGS is NULL, then POP_TARGET is used to reset
++   the options.  */
++static bool
++arm_pragma_target_parse (tree args, tree pop_target)
++{
++  tree prev_tree = build_target_option_node (&global_options);
++  tree cur_tree;
++  struct cl_target_option *prev_opt;
++  struct cl_target_option *cur_opt;
++
++  if (! args)
++    {
++      cur_tree = ((pop_target) ? pop_target : target_option_default_node);
++      cl_target_option_restore (&global_options,
++				TREE_TARGET_OPTION (cur_tree));
++    }
++  else
++    {
++      cur_tree = arm_valid_target_attribute_tree (args, &global_options,
++						  &global_options_set);
++      if (cur_tree == NULL_TREE)
++	{
++	  cl_target_option_restore (&global_options,
++				    TREE_TARGET_OPTION (prev_tree));
++	  return false;
++	}
++    }
++
++  target_option_current_node = cur_tree;
++  arm_reset_previous_fndecl ();
++
++  /* Figure out the previous mode.  */
++  prev_opt  = TREE_TARGET_OPTION (prev_tree);
++  cur_opt   = TREE_TARGET_OPTION (cur_tree);
++
++  gcc_assert (prev_opt);
++  gcc_assert (cur_opt);
++
++  if (cur_opt->x_target_flags != prev_opt->x_target_flags)
++    {
++      /* For the definitions, ensure all newly defined macros are considered
++	 as used for -Wunused-macros.  There is no point warning about the
++	 compiler predefined macros.  */
++      cpp_options *cpp_opts = cpp_get_options (parse_in);
++      unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros;
++      cpp_opts->warn_unused_macros = 0;
++
++      /* Update macros.  */
++      gcc_assert (cur_opt->x_target_flags == target_flags);
++      arm_cpu_builtins (parse_in);
++
++      cpp_opts->warn_unused_macros = saved_warn_unused_macros;
++    }
++
++  return true;
++}
++
++/* Register target pragmas.  We need to add the hook for parsing #pragma GCC
++   option here rather than in arm.c since it will pull in various preprocessor
++   functions, and those are not present in languages like fortran without a
++   preprocessor.  */
++
++void
++arm_register_target_pragmas (void)
++{
++  /* Update pragma hook to allow parsing #pragma GCC target.  */
++  targetm.target_option.pragma_parse = arm_pragma_target_parse;
++
++#ifdef REGISTER_SUBTARGET_PRAGMAS
++  REGISTER_SUBTARGET_PRAGMAS ();
++#endif
++}
+--- a/src/gcc/config/arm/arm-cores.def
++++ b/src/gcc/config/arm/arm-cores.def
+@@ -125,8 +125,8 @@ ARM_CORE("arm1026ej-s",	arm1026ejs, arm1026ejs,	5TEJ, FL_LDSCHED, 9e)
+ /* V6 Architecture Processors */
+ ARM_CORE("arm1136j-s",		arm1136js, arm1136js,		6J,  FL_LDSCHED, 9e)
+ ARM_CORE("arm1136jf-s",		arm1136jfs, arm1136jfs,		6J,  FL_LDSCHED | FL_VFPV2, 9e)
+-ARM_CORE("arm1176jz-s",		arm1176jzs, arm1176jzs,		6ZK, FL_LDSCHED, 9e)
+-ARM_CORE("arm1176jzf-s",	arm1176jzfs, arm1176jzfs,	6ZK, FL_LDSCHED | FL_VFPV2, 9e)
++ARM_CORE("arm1176jz-s",		arm1176jzs, arm1176jzs,		6KZ, FL_LDSCHED, 9e)
++ARM_CORE("arm1176jzf-s",	arm1176jzfs, arm1176jzfs,	6KZ, FL_LDSCHED | FL_VFPV2, 9e)
+ ARM_CORE("mpcorenovfp",		mpcorenovfp, mpcorenovfp,	6K,  FL_LDSCHED, 9e)
+ ARM_CORE("mpcore",		mpcore, mpcore,			6K,  FL_LDSCHED | FL_VFPV2, 9e)
+ ARM_CORE("arm1156t2-s",		arm1156t2s, arm1156t2s,		6T2, FL_LDSCHED, v6t2)
+@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex
+ ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)
+ ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
+ ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
+-ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
++ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, marvell_pj4)
+ 
+ /* V7 big.LITTLE implementations */
+ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+--- a/src/gcc/config/arm/arm-protos.h
++++ b/src/gcc/config/arm/arm-protos.h
+@@ -30,6 +30,7 @@ extern void arm_load_pic_register (unsigned long);
+ extern int arm_volatile_func (void);
+ extern void arm_expand_prologue (void);
+ extern void arm_expand_epilogue (bool);
++extern void arm_declare_function_name (FILE *, const char *, tree);
+ extern void thumb2_expand_return (bool);
+ extern const char *arm_strip_name_encoding (const char *);
+ extern void arm_asm_output_labelref (FILE *, const char *);
+@@ -66,10 +67,6 @@ extern rtx legitimize_tls_address (rtx, rtx);
+ extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
+ extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
+ extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT);
+-extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int,
+-					   int);
+-extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int,
+-					    int);
+ extern int thumb1_legitimate_address_p (machine_mode, rtx, int);
+ extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode,
+                                  bool, bool);
+@@ -89,7 +86,7 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
+ extern rtx neon_make_constant (rtx);
+ extern tree arm_builtin_vectorized_function (tree, tree, tree);
+ extern void neon_expand_vector_init (rtx, rtx);
+-extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
++extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
+ extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+ extern HOST_WIDE_INT neon_element_bits (machine_mode);
+ extern void neon_reinterpret (rtx, rtx);
+@@ -185,9 +182,6 @@ extern const char *thumb1_unexpanded_epilogue (void);
+ extern void thumb1_expand_prologue (void);
+ extern void thumb1_expand_epilogue (void);
+ extern const char *thumb1_output_interwork (void);
+-#ifdef TREE_CODE
+-extern int is_called_in_ARM_mode (tree);
+-#endif
+ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+ #ifdef RTX_CODE
+ extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
+@@ -216,14 +210,15 @@ extern int arm_dllexport_p (tree);
+ extern int arm_dllimport_p (tree);
+ extern void arm_mark_dllexport (tree);
+ extern void arm_mark_dllimport (tree);
++extern bool arm_change_mode_p (tree);
+ #endif
+ 
++extern tree arm_valid_target_attribute_tree (tree, struct gcc_options *,
++					     struct gcc_options *);
+ extern void arm_pr_long_calls (struct cpp_reader *);
+ extern void arm_pr_no_long_calls (struct cpp_reader *);
+ extern void arm_pr_long_calls_off (struct cpp_reader *);
+ 
+-extern void arm_lang_object_attributes_init(void);
+-
+ extern const char *arm_mangle_type (const_tree);
+ extern const char *arm_mangle_builtin_type (const_tree);
+ 
+@@ -257,13 +252,6 @@ struct cpu_vec_costs {
+ 
+ struct cpu_cost_table;
+ 
+-enum arm_sched_autopref
+-  {
+-    ARM_SCHED_AUTOPREF_OFF,
+-    ARM_SCHED_AUTOPREF_RANK,
+-    ARM_SCHED_AUTOPREF_FULL
+-  };
+-
+ /* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this
+    structure is modified.  */
+ 
+@@ -272,39 +260,58 @@ struct tune_params
+   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+   const struct cpu_cost_table *insn_extra_cost;
+   bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *);
++  int (*branch_cost) (bool, bool);
++  /* Vectorizer costs.  */
++  const struct cpu_vec_costs* vec_costs;
+   int constant_limit;
+   /* Maximum number of instructions to conditionalise.  */
+   int max_insns_skipped;
+-  int num_prefetch_slots;
+-  int l1_cache_size;
+-  int l1_cache_line_size;
+-  bool prefer_constant_pool;
+-  int (*branch_cost) (bool, bool);
++  /* Maximum number of instructions to inline calls to memset.  */
++  int max_insns_inline_memset;
++  /* Issue rate of the processor.  */
++  unsigned int issue_rate;
++  /* Explicit prefetch data.  */
++  struct
++    {
++      int num_slots;
++      int l1_cache_size;
++      int l1_cache_line_size;
++    } prefetch;
++  enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE}
++    prefer_constant_pool: 1;
+   /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
+-  bool prefer_ldrd_strd;
++  enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1;
+   /* The preference for non short cirtcuit operation when optimizing for
+      performance. The first element covers Thumb state and the second one
+      is for ARM state.  */
+-  bool logical_op_non_short_circuit[2];
+-  /* Vectorizer costs.  */
+-  const struct cpu_vec_costs* vec_costs;
+-  /* Prefer Neon for 64-bit bitops.  */
+-  bool prefer_neon_for_64bits;
++  enum log_op_non_short_circuit {LOG_OP_NON_SHORT_CIRCUIT_FALSE,
++				 LOG_OP_NON_SHORT_CIRCUIT_TRUE};
++  log_op_non_short_circuit logical_op_non_short_circuit_thumb: 1;
++  log_op_non_short_circuit logical_op_non_short_circuit_arm: 1;
+   /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding.  */
+-  bool disparage_flag_setting_t16_encodings;
+-  /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
+-     would be set.  */
+-  bool disparage_partial_flag_setting_t16_encodings;
++  enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL}
++    disparage_flag_setting_t16_encodings: 2;
++  enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1;
+   /* Prefer to inline string operations like memset by using Neon.  */
+-  bool string_ops_prefer_neon;
+-  /* Maximum number of instructions to inline calls to memset.  */
+-  int max_insns_inline_memset;
+-  /* Bitfield encoding the fuseable pairs of instructions.  */
+-  unsigned int fuseable_ops;
++  enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE}
++    string_ops_prefer_neon: 1;
++  /* Bitfield encoding the fusible pairs of instructions.  Use FUSE_OPS
++     in an initializer if multiple fusion operations are supported on a
++     target.  */
++  enum fuse_ops
++  {
++    FUSE_NOTHING   = 0,
++    FUSE_MOVW_MOVT = 1 << 0
++  } fusible_ops: 1;
+   /* Depth of scheduling queue to check for L2 autoprefetcher.  */
+-  enum arm_sched_autopref sched_autopref;
++  enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL}
++    sched_autopref: 2;
+ };
+ 
++/* Smash multiple fusion operations into a type that can be used for an
++   initializer.  */
++#define FUSE_OPS(x) ((tune_params::fuse_ops) (x))
++
+ extern const struct tune_params *current_tune;
+ extern int vfp3_const_double_for_fract_bits (rtx);
+ /* return power of two from operand, otherwise 0.  */
+@@ -324,9 +331,16 @@ extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
+ 
+ extern void arm_emit_eabi_attribute (const char *, int, int);
+ 
++extern void arm_reset_previous_fndecl (void);
++
+ /* Defined in gcc/common/config/arm-common.c.  */
+ extern const char *arm_rewrite_selected_cpu (const char *name);
+ 
++/* Defined in gcc/common/config/arm-c.c.  */
++extern void arm_lang_object_attributes_init (void);
++extern void arm_register_target_pragmas (void);
++extern void arm_cpu_cpp_builtins (struct cpp_reader *);
++
+ extern bool arm_is_constant_pool_ref (rtx);
+ 
+ /* Flags used to identify the presence of processor capabilities.  */
+@@ -368,6 +382,7 @@ extern bool arm_is_constant_pool_ref (rtx);
+ 
+ #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
+ #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
++#define FL_ARCH6KZ    (1 << 31)       /* ARMv6KZ architecture.  */
+ 
+ /* Flags that only effect tuning, not available instructions.  */
+ #define FL_TUNE		(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
+@@ -387,7 +402,7 @@ extern bool arm_is_constant_pool_ref (rtx);
+ #define FL_FOR_ARCH6J	FL_FOR_ARCH6
+ #define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
+ #define FL_FOR_ARCH6Z	FL_FOR_ARCH6
+-#define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
++#define FL_FOR_ARCH6KZ	(FL_FOR_ARCH6K | FL_ARCH6KZ)
+ #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
+ #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
+ #define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
+@@ -427,6 +442,9 @@ extern int arm_arch6;
+ /* Nonzero if this chip supports the ARM 6K extensions.  */
+ extern int arm_arch6k;
+ 
++/* Nonzero if this chip supports the ARM 6KZ extensions.  */
++extern int arm_arch6kz;
++
+ /* Nonzero if instructions present in ARMv6-M can be used.  */
+ extern int arm_arch6m;
+ 
+@@ -467,12 +485,6 @@ extern int arm_tune_wbuf;
+ /* Nonzero if tuning for Cortex-A9.  */
+ extern int arm_tune_cortex_a9;
+ 
+-/* Nonzero if generating Thumb instructions.  */
+-extern int thumb_code;
+-
+-/* Nonzero if generating Thumb-1 instructions.  */
+-extern int thumb1_code;
+-
+ /* Nonzero if we should define __THUMB_INTERWORK__ in the
+    preprocessor.
+    XXX This is a bit of a hack, it's intended to help work around
+--- a/src/gcc/config/arm/arm-tables.opt
++++ b/src/gcc/config/arm/arm-tables.opt
+@@ -371,46 +371,49 @@ EnumValue
+ Enum(arm_arch) String(armv6z) Value(13)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv6zk) Value(14)
++Enum(arm_arch) String(armv6kz) Value(14)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv6t2) Value(15)
++Enum(arm_arch) String(armv6zk) Value(15)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv6-m) Value(16)
++Enum(arm_arch) String(armv6t2) Value(16)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv6s-m) Value(17)
++Enum(arm_arch) String(armv6-m) Value(17)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7) Value(18)
++Enum(arm_arch) String(armv6s-m) Value(18)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7-a) Value(19)
++Enum(arm_arch) String(armv7) Value(19)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7ve) Value(20)
++Enum(arm_arch) String(armv7-a) Value(20)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7-r) Value(21)
++Enum(arm_arch) String(armv7ve) Value(21)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7-m) Value(22)
++Enum(arm_arch) String(armv7-r) Value(22)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv7e-m) Value(23)
++Enum(arm_arch) String(armv7-m) Value(23)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv8-a) Value(24)
++Enum(arm_arch) String(armv7e-m) Value(24)
+ 
+ EnumValue
+-Enum(arm_arch) String(armv8-a+crc) Value(25)
++Enum(arm_arch) String(armv8-a) Value(25)
+ 
+ EnumValue
+-Enum(arm_arch) String(iwmmxt) Value(26)
++Enum(arm_arch) String(armv8-a+crc) Value(26)
+ 
+ EnumValue
+-Enum(arm_arch) String(iwmmxt2) Value(27)
++Enum(arm_arch) String(iwmmxt) Value(27)
++
++EnumValue
++Enum(arm_arch) String(iwmmxt2) Value(28)
+ 
+ Enum
+ Name(arm_fpu) Type(int)
+--- a/src/gcc/config/arm/arm.c
++++ b/src/gcc/config/arm/arm.c
+@@ -94,10 +94,12 @@
+ #include "opts.h"
+ #include "dumpfile.h"
+ #include "gimple-expr.h"
++#include "target-globals.h"
+ #include "builtins.h"
+ #include "tm-constrs.h"
+ #include "rtl-iter.h"
+ #include "sched-int.h"
++#include "tree.h"
+ 
+ /* Forward definitions of types.  */
+ typedef struct minipool_node    Mnode;
+@@ -121,6 +123,7 @@ static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
+ static unsigned bit_count (unsigned long);
+ static int arm_address_register_rtx_p (rtx, int);
+ static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
++static bool is_called_in_ARM_mode (tree);
+ static int thumb2_legitimate_index_p (machine_mode, rtx, int);
+ static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
+ static rtx arm_legitimize_address (rtx, rtx, machine_mode);
+@@ -231,6 +234,7 @@ static void arm_encode_section_info (tree, rtx, int);
+ 
+ static void arm_file_end (void);
+ static void arm_file_start (void);
++static void arm_insert_attributes (tree, tree *);
+ 
+ static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
+ 					tree, int *, int);
+@@ -264,6 +268,10 @@ static tree arm_build_builtin_va_list (void);
+ static void arm_expand_builtin_va_start (tree, rtx);
+ static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+ static void arm_option_override (void);
++static void arm_option_print (FILE *, int, struct cl_target_option *);
++static void arm_set_current_function (tree);
++static bool arm_can_inline_p (tree, tree);
++static bool arm_valid_target_attribute_p (tree, tree, tree, int);
+ static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
+ static bool arm_macro_fusion_p (void);
+ static bool arm_cannot_copy_insn_p (rtx_insn *);
+@@ -386,6 +394,9 @@ static const struct attribute_spec arm_attribute_table[] =
+ #undef  TARGET_ATTRIBUTE_TABLE
+ #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
+ 
++#undef  TARGET_INSERT_ATTRIBUTES
++#define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
++
+ #undef TARGET_ASM_FILE_START
+ #define TARGET_ASM_FILE_START arm_file_start
+ #undef TARGET_ASM_FILE_END
+@@ -412,9 +423,15 @@ static const struct attribute_spec arm_attribute_table[] =
+ #undef  TARGET_ASM_FUNCTION_EPILOGUE
+ #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
+ 
++#undef TARGET_CAN_INLINE_P
++#define TARGET_CAN_INLINE_P arm_can_inline_p
++
+ #undef  TARGET_OPTION_OVERRIDE
+ #define TARGET_OPTION_OVERRIDE arm_option_override
+ 
++#undef TARGET_OPTION_PRINT
++#define TARGET_OPTION_PRINT arm_option_print
++
+ #undef  TARGET_COMP_TYPE_ATTRIBUTES
+ #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
+ 
+@@ -430,6 +447,12 @@ static const struct attribute_spec arm_attribute_table[] =
+ #undef  TARGET_SCHED_ADJUST_COST
+ #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
+ 
++#undef TARGET_SET_CURRENT_FUNCTION
++#define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
++
++#undef TARGET_OPTION_VALID_ATTRIBUTE_P
++#define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
++
+ #undef TARGET_SCHED_REORDER
+ #define TARGET_SCHED_REORDER arm_sched_reorder
+ 
+@@ -806,6 +829,9 @@ int arm_arch6 = 0;
+ /* Nonzero if this chip supports the ARM 6K extensions.  */
+ int arm_arch6k = 0;
+ 
++/* Nonzero if this chip supports the ARM 6KZ extensions.  */
++int arm_arch6kz = 0;
++
+ /* Nonzero if instructions present in ARMv6-M can be used.  */
+ int arm_arch6m = 0;
+ 
+@@ -846,12 +872,6 @@ int arm_tune_wbuf = 0;
+ /* Nonzero if tuning for Cortex-A9.  */
+ int arm_tune_cortex_a9 = 0;
+ 
+-/* Nonzero if generating Thumb instructions.  */
+-int thumb_code = 0;
+-
+-/* Nonzero if generating Thumb-1 instructions.  */
+-int thumb1_code = 0;
+-
+ /* Nonzero if we should define __THUMB_INTERWORK__ in the
+    preprocessor.
+    XXX This is a bit of a hack, it's intended to help work around
+@@ -940,11 +960,13 @@ struct processors
+ };
+ 
+ 
+-#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
+-#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
+-  prefetch_slots, \
+-  l1_size, \
+-  l1_line_size
++#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
++#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
++  {								\
++    num_slots,							\
++    l1_size,							\
++    l1_line_size						\
++  }
+ 
+ /* arm generic vectorizer costs.  */
+ static const
+@@ -1027,7 +1049,9 @@ const struct cpu_cost_table cortexa9_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (1),	/* storef.  */
+     COSTS_N_INSNS (1),	/* stored.  */
+-    COSTS_N_INSNS (1)	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1128,7 +1152,9 @@ const struct cpu_cost_table cortexa8_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (1),	/* storef.  */
+     COSTS_N_INSNS (1),	/* stored.  */
+-    COSTS_N_INSNS (1)	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1230,7 +1256,9 @@ const struct cpu_cost_table cortexa5_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (2),	/* storef.  */
+     COSTS_N_INSNS (2),	/* stored.  */
+-    COSTS_N_INSNS (1)	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1333,7 +1361,9 @@ const struct cpu_cost_table cortexa7_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (2),	/* storef.  */
+     COSTS_N_INSNS (2),	/* stored.  */
+-    COSTS_N_INSNS (1)	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1434,7 +1464,9 @@ const struct cpu_cost_table cortexa12_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (2),	/* storef.  */
+     COSTS_N_INSNS (2),	/* stored.  */
+-    0			/* store_unaligned.  */
++    0,			/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1535,7 +1567,9 @@ const struct cpu_cost_table cortexa15_extra_costs =
+     2,			/* stm_regs_per_insn_subsequent.  */
+     0,			/* storef.  */
+     0,			/* stored.  */
+-    0			/* store_unaligned.  */
++    0,			/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1636,7 +1670,9 @@ const struct cpu_cost_table v7m_extra_costs =
+     1,			/* stm_regs_per_insn_subsequent.  */
+     COSTS_N_INSNS (2),	/* storef.  */
+     COSTS_N_INSNS (3),	/* stored.  */
+-    COSTS_N_INSNS (1)  /* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* store_unaligned.  */
++    COSTS_N_INSNS (1),	/* loadv.  */
++    COSTS_N_INSNS (1)	/* storev.  */
+   },
+   {
+     /* FP SFmode */
+@@ -1678,49 +1714,50 @@ const struct cpu_cost_table v7m_extra_costs =
+   }
+ };
+ 
+-#define ARM_FUSE_NOTHING	(0)
+-#define ARM_FUSE_MOVW_MOVT	(1 << 0)
+-
+ const struct tune_params arm_slowmul_tune =
+ {
+   arm_slowmul_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   3,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_fastmul_tune =
+ {
+   arm_fastmul_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ /* StrongARM has early execution of branches, so a sequence that is worth
+@@ -1729,233 +1766,279 @@ const struct tune_params arm_fastmul_tune =
+ const struct tune_params arm_strongarm_tune =
+ {
+   arm_fastmul_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   3,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_xscale_tune =
+ {
+   arm_xscale_rtx_costs,
+-  NULL,
++  NULL,					/* Insn extra costs.  */
+   xscale_sched_adjust_cost,
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   2,						/* Constant limit.  */
+   3,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_9e_tune =
+ {
+   arm_9e_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
++};
++
++const struct tune_params arm_marvell_pj4_tune =
++{
++  arm_9e_rtx_costs,
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
+   arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  &arm_default_vec_cost,
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_v6t2_tune =
+ {
+   arm_9e_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
++
+ /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
+ const struct tune_params arm_cortex_tune =
+ {
+   arm_9e_rtx_costs,
+   &generic_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a8_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa8_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a7_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa7_extra_costs,
+-  NULL,
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,			/* Vectorizer costs.  */
+-  false,					/* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a15_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa15_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   2,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  3,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  true,						/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  true, true,                                   /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_TRUE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_ALL,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_FULL
+ };
+ 
+ const struct tune_params arm_cortex_a53_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa53_extra_costs,
+-  NULL,						/* Scheduler cost adjustment.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,			/* Vectorizer costs.  */
+-  false,					/* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a57_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa57_extra_costs,
+-  NULL,                                         /* Scheduler cost adjustment.  */
+-  1,                                           /* Constant limit.  */
+-  2,                                           /* Max cond insns.  */
+-  ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,                                       /* Prefer constant pool.  */
++  NULL,					/* Sched adj cost.  */
+   arm_default_branch_cost,
+-  true,                                       /* Prefer LDRD/STRD.  */
+-  {true, true},                                /* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+-  false,                                       /* Prefer Neon for 64-bits bitops.  */
+-  true, true,                                  /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
++  &arm_default_vec_cost,
++  1,						/* Constant limit.  */
++  2,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  3,						/* Issue rate.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_TRUE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_ALL,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
++  tune_params::SCHED_AUTOPREF_FULL
+ };
+ 
+ const struct tune_params arm_xgene1_tune =
+ {
+   arm_9e_rtx_costs,
+   &xgene1_extra_costs,
+-  NULL,                                        /* Scheduler cost adjustment.  */
+-  1,                                           /* Constant limit.  */
+-  2,                                           /* Max cond insns.  */
+-  ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,                                       /* Prefer constant pool.  */
++  NULL,					/* Sched adj cost.  */
+   arm_default_branch_cost,
+-  true,                                        /* Prefer LDRD/STRD.  */
+-  {true, true},                                /* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+-  false,                                       /* Prefer Neon for 64-bits bitops.  */
+-  true, true,                                  /* Prefer 32-bit encodings.  */
+-  false,				       /* Prefer Neon for stringops.  */
+-  32,					       /* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  &arm_default_vec_cost,
++  1,						/* Constant limit.  */
++  2,						/* Max cond insns.  */
++  32,						/* Memset max inline.  */
++  4,						/* Issue rate.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_TRUE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_ALL,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ /* Branches can be dual-issued on Cortex-A5, so conditional execution is
+@@ -1965,21 +2048,23 @@ const struct tune_params arm_cortex_a5_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa5_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_cortex_a5_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   1,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_cortex_a5_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {false, false},				/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a9_tune =
+@@ -1987,41 +2072,45 @@ const struct tune_params arm_cortex_a9_tune =
+   arm_9e_rtx_costs,
+   &cortexa9_extra_costs,
+   cortex_a9_sched_adjust_cost,
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_BENEFICIAL(4,32,32),
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_cortex_a12_tune =
+ {
+   arm_9e_rtx_costs,
+   &cortexa12_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+   1,						/* Constant limit.  */
+   2,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  true,						/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  true, true,                                   /* Prefer 32-bit encodings.  */
+-  true,						/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_TRUE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_ALL,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_TRUE,
++  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
+@@ -2035,21 +2124,23 @@ const struct tune_params arm_v7m_tune =
+ {
+   arm_9e_rtx_costs,
+   &v7m_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_cortex_m_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   2,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_cortex_m_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {false, false},				/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ /* Cortex-M7 tuning.  */
+@@ -2058,21 +2149,23 @@ const struct tune_params arm_cortex_m7_tune =
+ {
+   arm_9e_rtx_costs,
+   &v7m_extra_costs,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Sched adj cost.  */
++  arm_cortex_m7_branch_cost,
++  &arm_default_vec_cost,
+   0,						/* Constant limit.  */
+   1,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_cortex_m7_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
+@@ -2080,43 +2173,47 @@ const struct tune_params arm_cortex_m7_tune =
+ const struct tune_params arm_v6m_tune =
+ {
+   arm_9e_rtx_costs,
+-  NULL,
+-  NULL,						/* Sched adj cost.  */
++  NULL,					/* Insn extra costs.  */
++  NULL,					/* Sched adj cost.  */
++  arm_default_branch_cost,
++  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  1,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  false,					/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {false, false},				/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_FALSE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ const struct tune_params arm_fa726te_tune =
+ {
+   arm_9e_rtx_costs,
+-  NULL,
++  NULL,					/* Insn extra costs.  */
+   fa726te_sched_adjust_cost,
++  arm_default_branch_cost,
++  &arm_default_vec_cost,
+   1,						/* Constant limit.  */
+   5,						/* Max cond insns.  */
++  8,						/* Memset max inline.  */
++  2,						/* Issue rate.  */
+   ARM_PREFETCH_NOT_BENEFICIAL,
+-  true,						/* Prefer constant pool.  */
+-  arm_default_branch_cost,
+-  false,					/* Prefer LDRD/STRD.  */
+-  {true, true},					/* Prefer non short circuit.  */
+-  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+-  false,                                        /* Prefer Neon for 64-bits bitops.  */
+-  false, false,                                 /* Prefer 32-bit encodings.  */
+-  false,					/* Prefer Neon for stringops.  */
+-  8,						/* Maximum insns to inline memset.  */
+-  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
+-  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
++  tune_params::PREF_CONST_POOL_TRUE,
++  tune_params::PREF_LDRD_FALSE,
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
++  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
++  tune_params::DISPARAGE_FLAGS_NEITHER,
++  tune_params::PREF_NEON_64_FALSE,
++  tune_params::PREF_NEON_STRINGOPS_FALSE,
++  tune_params::FUSE_NOTHING,
++  tune_params::SCHED_AUTOPREF_OFF
+ };
+ 
+ 
+@@ -2626,6 +2723,171 @@ arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+ }
+ 
++/* Check any incompatible options that the user has specified.  */
++static void
++arm_option_check_internal (struct gcc_options *opts)
++{
++  int flags = opts->x_target_flags;
++
++  /* Make sure that the processor choice does not conflict with any of the
++     other command line choices.  */
++  if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
++    error ("target CPU does not support ARM mode");
++
++  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
++     from here where no function is being compiled currently.  */
++  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
++    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
++
++  if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
++    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
++
++  /* If this target is normally configured to use APCS frames, warn if they
++     are turned off and debugging is turned on.  */
++  if (TARGET_ARM_P (flags)
++      && write_symbols != NO_DEBUG
++      && !TARGET_APCS_FRAME
++      && (TARGET_DEFAULT & MASK_APCS_FRAME))
++    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
++
++  /* iWMMXt unsupported under Thumb mode.  */
++  if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
++    error ("iWMMXt unsupported under Thumb mode");
++
++  if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
++    error ("can not use -mtp=cp15 with 16-bit Thumb");
++
++  if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
++    {
++      error ("RTP PIC is incompatible with Thumb");
++      flag_pic = 0;
++    }
++
++  /* We only support -mslow-flash-data on armv7-m targets.  */
++  if (target_slow_flash_data
++      && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
++	  || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
++    error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
++}
++
++/* Recompute the global settings depending on target attribute options.  */
++
++static void
++arm_option_params_internal (void)
++{
++  /* If we are not using the default (ARM mode) section anchor offset
++     ranges, then set the correct ranges now.  */
++  if (TARGET_THUMB1)
++    {
++      /* Thumb-1 LDR instructions cannot have negative offsets.
++         Permissible positive offset ranges are 5-bit (for byte loads),
++         6-bit (for halfword loads), or 7-bit (for word loads).
++         Empirical results suggest a 7-bit anchor range gives the best
++         overall code size.  */
++      targetm.min_anchor_offset = 0;
++      targetm.max_anchor_offset = 127;
++    }
++  else if (TARGET_THUMB2)
++    {
++      /* The minimum is set such that the total size of the block
++         for a particular anchor is 248 + 1 + 4095 bytes, which is
++         divisible by eight, ensuring natural spacing of anchors.  */
++      targetm.min_anchor_offset = -248;
++      targetm.max_anchor_offset = 4095;
++    }
++  else
++    {
++      targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
++      targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
++    }
++
++  if (optimize_size)
++    {
++      /* If optimizing for size, bump the number of instructions that we
++         are prepared to conditionally execute (even on a StrongARM).  */
++      max_insns_skipped = 6;
++
++      /* For THUMB2, we limit the conditional sequence to one IT block.  */
++      if (TARGET_THUMB2)
++        max_insns_skipped = arm_restrict_it ? 1 : 4;
++    }
++  else
++    /* When -mrestrict-it is in use tone down the if-conversion.  */
++    max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
++      ? 1 : current_tune->max_insns_skipped;
++}
++
++/* True if -mflip-thumb should next add an attribute for the default
++   mode, false if it should next add an attribute for the opposite mode.  */
++static GTY(()) bool thumb_flipper;
++
++/* Options after initial target override.  */
++static GTY(()) tree init_optimize;
++
++/* Reset options between modes that the user has specified.  */
++static void
++arm_option_override_internal (struct gcc_options *opts,
++			      struct gcc_options *opts_set)
++{
++  if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
++    {
++      warning (0, "target CPU does not support THUMB instructions");
++      opts->x_target_flags &= ~MASK_THUMB;
++    }
++
++  if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
++    {
++      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
++      opts->x_target_flags &= ~MASK_APCS_FRAME;
++    }
++
++  /* Callee super interworking implies thumb interworking.  Adding
++     this to the flags here simplifies the logic elsewhere.  */
++  if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
++    opts->x_target_flags |= MASK_INTERWORK;
++
++  /* need to remember initial values so combinaisons of options like
++     -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
++  cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
++
++  if (! opts_set->x_arm_restrict_it)
++    opts->x_arm_restrict_it = arm_arch8;
++
++  if (!TARGET_THUMB2_P (opts->x_target_flags))
++    opts->x_arm_restrict_it = 0;
++
++  /* Don't warn since it's on by default in -O2.  */
++  if (TARGET_THUMB1_P (opts->x_target_flags))
++    opts->x_flag_schedule_insns = 0;
++  else
++    opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
++
++  /* Disable shrink-wrap when optimizing function for size, since it tends to
++     generate additional returns.  */
++  if (optimize_function_for_size_p (cfun)
++      && TARGET_THUMB2_P (opts->x_target_flags))
++    opts->x_flag_shrink_wrap = false;
++  else
++    opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
++
++  /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
++     - epilogue_insns - does not accurately model the corresponding insns
++     emitted in the asm file.  In particular, see the comment in thumb_exit
++     'Find out how many of the (return) argument registers we can corrupt'.
++     As a consequence, the epilogue may clobber registers without fipa-ra
++     finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
++     TODO: Accurately model clobbers for epilogue_insns and reenable
++     fipa-ra.  */
++  if (TARGET_THUMB1_P (opts->x_target_flags))
++    opts->x_flag_ipa_ra = 0;
++  else
++    opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
++
++  /* Thumb2 inline assembly code should always use unified syntax.
++     This will apply to ARM and Thumb1 eventually.  */
++  opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
++}
++
+ /* Fix up any incompatible options that the user has specified.  */
+ static void
+ arm_option_override (void)
+@@ -2772,10 +3034,9 @@ arm_option_override (void)
+   tune_flags = arm_selected_tune->flags;
+   current_tune = arm_selected_tune->tune;
+ 
+-  /* Make sure that the processor choice does not conflict with any of the
+-     other command line choices.  */
+-  if (TARGET_ARM && !(insn_flags & FL_NOTM))
+-    error ("target CPU does not support ARM mode");
++  /* TBD: Dwarf info for apcs frame is not handled yet.  */
++  if (TARGET_APCS_FRAME)
++    flag_shrink_wrap = false;
+ 
+   /* BPABI targets use linker tricks to allow interworking on cores
+      without thumb support.  */
+@@ -2785,31 +3046,6 @@ arm_option_override (void)
+       target_flags &= ~MASK_INTERWORK;
+     }
+ 
+-  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
+-    {
+-      warning (0, "target CPU does not support THUMB instructions");
+-      target_flags &= ~MASK_THUMB;
+-    }
+-
+-  if (TARGET_APCS_FRAME && TARGET_THUMB)
+-    {
+-      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
+-      target_flags &= ~MASK_APCS_FRAME;
+-    }
+-
+-  /* Callee super interworking implies thumb interworking.  Adding
+-     this to the flags here simplifies the logic elsewhere.  */
+-  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
+-    target_flags |= MASK_INTERWORK;
+-
+-  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
+-     from here where no function is being compiled currently.  */
+-  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
+-    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
+-
+-  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
+-    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
+-
+   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
+     {
+       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
+@@ -2825,14 +3061,6 @@ arm_option_override (void)
+   if (TARGET_APCS_REENT)
+     warning (0, "APCS reentrant code not supported.  Ignored");
+ 
+-  /* If this target is normally configured to use APCS frames, warn if they
+-     are turned off and debugging is turned on.  */
+-  if (TARGET_ARM
+-      && write_symbols != NO_DEBUG
+-      && !TARGET_APCS_FRAME
+-      && (TARGET_DEFAULT & MASK_APCS_FRAME))
+-    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
+-
+   if (TARGET_APCS_FLOAT)
+     warning (0, "passing floating point arguments in fp regs not yet supported");
+ 
+@@ -2844,6 +3072,7 @@ arm_option_override (void)
+   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
+   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
+   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
++  arm_arch6kz = arm_arch6k && (insn_flags & FL_ARCH6KZ);
+   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+   arm_arch6m = arm_arch6 && !arm_arch_notm;
+   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
+@@ -2854,8 +3083,6 @@ arm_option_override (void)
+ 
+   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
+   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
+-  thumb_code = TARGET_ARM == 0;
+-  thumb1_code = TARGET_THUMB1 != 0;
+   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
+   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
+   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
+@@ -2866,32 +3093,6 @@ arm_option_override (void)
+   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
+   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
+-  if (arm_restrict_it == 2)
+-    arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+-
+-  if (!TARGET_THUMB2)
+-    arm_restrict_it = 0;
+-
+-  /* If we are not using the default (ARM mode) section anchor offset
+-     ranges, then set the correct ranges now.  */
+-  if (TARGET_THUMB1)
+-    {
+-      /* Thumb-1 LDR instructions cannot have negative offsets.
+-         Permissible positive offset ranges are 5-bit (for byte loads),
+-         6-bit (for halfword loads), or 7-bit (for word loads).
+-         Empirical results suggest a 7-bit anchor range gives the best
+-         overall code size.  */
+-      targetm.min_anchor_offset = 0;
+-      targetm.max_anchor_offset = 127;
+-    }
+-  else if (TARGET_THUMB2)
+-    {
+-      /* The minimum is set such that the total size of the block
+-         for a particular anchor is 248 + 1 + 4095 bytes, which is
+-         divisible by eight, ensuring natural spacing of anchors.  */
+-      targetm.min_anchor_offset = -248;
+-      targetm.max_anchor_offset = 4095;
+-    }
+ 
+   /* V5 code we generate is completely interworking capable, so we turn off
+      TARGET_INTERWORK here to avoid many tests later on.  */
+@@ -2951,10 +3152,6 @@ arm_option_override (void)
+   if (TARGET_IWMMXT && TARGET_NEON)
+     error ("iWMMXt and NEON are incompatible");
+ 
+-  /* iWMMXt unsupported under Thumb mode.  */
+-  if (TARGET_THUMB && TARGET_IWMMXT)
+-    error ("iWMMXt unsupported under Thumb mode");
+-
+   /* __fp16 support currently assumes the core has ldrh.  */
+   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+     sorry ("__fp16 and no ldrh");
+@@ -2999,9 +3196,6 @@ arm_option_override (void)
+ 	target_thread_pointer = TP_SOFT;
+     }
+ 
+-  if (TARGET_HARD_TP && TARGET_THUMB1)
+-    error ("can not use -mtp=cp15 with 16-bit Thumb");
+-
+   /* Override the default structure alignment for AAPCS ABI.  */
+   if (!global_options_set.x_arm_structure_size_boundary)
+     {
+@@ -3024,12 +3218,6 @@ arm_option_override (void)
+ 	}
+     }
+ 
+-  if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
+-    {
+-      error ("RTP PIC is incompatible with Thumb");
+-      flag_pic = 0;
+-    }
+-
+   /* If stack checking is disabled, we can use r10 as the PIC register,
+      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
+   if (flag_pic && TARGET_SINGLE_PIC_BASE)
+@@ -3097,25 +3285,6 @@ arm_option_override (void)
+       unaligned_access = 0;
+     }
+ 
+-  if (TARGET_THUMB1 && flag_schedule_insns)
+-    {
+-      /* Don't warn since it's on by default in -O2.  */
+-      flag_schedule_insns = 0;
+-    }
+-
+-  if (optimize_size)
+-    {
+-      /* If optimizing for size, bump the number of instructions that we
+-         are prepared to conditionally execute (even on a StrongARM).  */
+-      max_insns_skipped = 6;
+-
+-      /* For THUMB2, we limit the conditional sequence to one IT block.  */
+-      if (TARGET_THUMB2)
+-	max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
+-    }
+-  else
+-    max_insns_skipped = current_tune->max_insns_skipped;
+-
+   /* Hot/Cold partitioning is not currently supported, since we can't
+      handle literal pool placement in that case.  */
+   if (flag_reorder_blocks_and_partition)
+@@ -3140,31 +3309,33 @@ arm_option_override (void)
+       && abi_version_at_least(2))
+     flag_strict_volatile_bitfields = 1;
+ 
+-  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
+-     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
++  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
++     have deemed it beneficial (signified by setting
++     prefetch.num_slots to 1 or more).  */
+   if (flag_prefetch_loop_arrays < 0
+       && HAVE_prefetch
+       && optimize >= 3
+-      && current_tune->num_prefetch_slots > 0)
++      && current_tune->prefetch.num_slots > 0)
+     flag_prefetch_loop_arrays = 1;
+ 
+-  /* Set up parameters to be used in prefetching algorithm.  Do not override the
+-     defaults unless we are tuning for a core we have researched values for.  */
+-  if (current_tune->num_prefetch_slots > 0)
++  /* Set up parameters to be used in prefetching algorithm.  Do not
++     override the defaults unless we are tuning for a core we have
++     researched values for.  */
++  if (current_tune->prefetch.num_slots > 0)
+     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+-                           current_tune->num_prefetch_slots,
+-                           global_options.x_param_values,
+-                           global_options_set.x_param_values);
+-  if (current_tune->l1_cache_line_size >= 0)
++			   current_tune->prefetch.num_slots,
++			   global_options.x_param_values,
++			   global_options_set.x_param_values);
++  if (current_tune->prefetch.l1_cache_line_size >= 0)
+     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+-                           current_tune->l1_cache_line_size,
+-                           global_options.x_param_values,
+-                           global_options_set.x_param_values);
+-  if (current_tune->l1_cache_size >= 0)
++			   current_tune->prefetch.l1_cache_line_size,
++			   global_options.x_param_values,
++			   global_options_set.x_param_values);
++  if (current_tune->prefetch.l1_cache_size >= 0)
+     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+-                           current_tune->l1_cache_size,
+-                           global_options.x_param_values,
+-                           global_options_set.x_param_values);
++			   current_tune->prefetch.l1_cache_size,
++			   global_options.x_param_values,
++			   global_options_set.x_param_values);
+ 
+   /* Use Neon to perform 64-bits operations rather than core
+      registers.  */
+@@ -3174,67 +3345,63 @@ arm_option_override (void)
+ 
+   /* Use the alternative scheduling-pressure algorithm by default.  */
+   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
+-                         global_options.x_param_values,
+-                         global_options_set.x_param_values);
++			 global_options.x_param_values,
++			 global_options_set.x_param_values);
+ 
+   /* Look through ready list and all of queue for instructions
+      relevant for L2 auto-prefetcher.  */
+   int param_sched_autopref_queue_depth;
+-  if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
+-    param_sched_autopref_queue_depth = -1;
+-  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
+-    param_sched_autopref_queue_depth = 0;
+-  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
+-    param_sched_autopref_queue_depth = max_insn_queue_index + 1;
+-  else
+-    gcc_unreachable ();
+-  maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
+-			 param_sched_autopref_queue_depth,
+-                         global_options.x_param_values,
+-                         global_options_set.x_param_values);
+ 
+-  /* Disable shrink-wrap when optimizing function for size, since it tends to
+-     generate additional returns.  */
+-  if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
+-    flag_shrink_wrap = false;
+-  /* TBD: Dwarf info for apcs frame is not handled yet.  */
+-  if (TARGET_APCS_FRAME)
+-    flag_shrink_wrap = false;
++  switch (current_tune->sched_autopref)
++    {
++    case tune_params::SCHED_AUTOPREF_OFF:
++      param_sched_autopref_queue_depth = -1;
++      break;
++
++    case tune_params::SCHED_AUTOPREF_RANK:
++      param_sched_autopref_queue_depth = 0;
++      break;
++
++    case tune_params::SCHED_AUTOPREF_FULL:
++      param_sched_autopref_queue_depth = max_insn_queue_index + 1;
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
+ 
+-  /* We only support -mslow-flash-data on armv7-m targets.  */
+-  if (target_slow_flash_data
+-      && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
+-	  || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
+-    error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
++  maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
++			 param_sched_autopref_queue_depth,
++			 global_options.x_param_values,
++			 global_options_set.x_param_values);
+ 
+   /* Currently, for slow flash data, we just disable literal pools.  */
+   if (target_slow_flash_data)
+     arm_disable_literal_pool = true;
+ 
+-  /* Thumb2 inline assembly code should always use unified syntax.
+-     This will apply to ARM and Thumb1 eventually.  */
+-  if (TARGET_THUMB2)
+-    inline_asm_unified = 1;
+-
+   /* Disable scheduling fusion by default if it's not armv7 processor
+      or doesn't prefer ldrd/strd.  */
+   if (flag_schedule_fusion == 2
+       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
+     flag_schedule_fusion = 0;
+ 
+-  /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
+-     - epilogue_insns - does not accurately model the corresponding insns
+-     emitted in the asm file.  In particular, see the comment in thumb_exit
+-     'Find out how many of the (return) argument registers we can corrupt'.
+-     As a consequence, the epilogue may clobber registers without fipa-ra
+-     finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
+-     TODO: Accurately model clobbers for epilogue_insns and reenable
+-     fipa-ra.  */
+-  if (TARGET_THUMB1)
+-    flag_ipa_ra = 0;
++  /* Need to remember initial options before they are overriden.  */
++  init_optimize = build_optimization_node (&global_options);
++
++  arm_option_override_internal (&global_options, &global_options_set);
++  arm_option_check_internal (&global_options);
++  arm_option_params_internal ();
+ 
+   /* Register global variables with the garbage collector.  */
+   arm_add_gc_roots ();
++
++  /* Save the initial options in case the user does function specific
++     options.  */
++  target_option_default_node = target_option_current_node
++    = build_target_option_node (&global_options);
++
++  /* Init initial mode for testing.  */
++  thumb_flipper = TARGET_THUMB;
+ }
+ 
+ static void
+@@ -3388,13 +3555,20 @@ arm_warn_func_return (tree decl)
+ static void
+ arm_asm_trampoline_template (FILE *f)
+ {
++  if (TARGET_UNIFIED_ASM)
++    fprintf (f, "\t.syntax unified\n");
++  else
++    fprintf (f, "\t.syntax divided\n");
++
+   if (TARGET_ARM)
+     {
++      fprintf (f, "\t.arm\n");
+       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+     }
+   else if (TARGET_THUMB2)
+     {
++      fprintf (f, "\t.thumb\n");
+       /* The Thumb-2 trampoline is similar to the arm implementation.
+ 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
+       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+@@ -7946,236 +8120,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
+   return x;
+ }
+ 
+-bool
+-arm_legitimize_reload_address (rtx *p,
+-			       machine_mode mode,
+-			       int opnum, int type,
+-			       int ind_levels ATTRIBUTE_UNUSED)
+-{
+-  /* We must recognize output that we have already generated ourselves.  */
+-  if (GET_CODE (*p) == PLUS
+-      && GET_CODE (XEXP (*p, 0)) == PLUS
+-      && REG_P (XEXP (XEXP (*p, 0), 0))
+-      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
+-      && CONST_INT_P (XEXP (*p, 1)))
+-    {
+-      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+-		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+-		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+-      return true;
+-    }
+-
+-  if (GET_CODE (*p) == PLUS
+-      && REG_P (XEXP (*p, 0))
+-      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
+-      /* If the base register is equivalent to a constant, let the generic
+-	 code handle it.  Otherwise we will run into problems if a future
+-	 reload pass decides to rematerialize the constant.  */
+-      && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
+-      && CONST_INT_P (XEXP (*p, 1)))
+-    {
+-      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
+-      HOST_WIDE_INT low, high;
+-
+-      /* Detect coprocessor load/stores.  */
+-      bool coproc_p = ((TARGET_HARD_FLOAT
+-			&& TARGET_VFP
+-			&& (mode == SFmode || mode == DFmode))
+-		       || (TARGET_REALLY_IWMMXT
+-			   && VALID_IWMMXT_REG_MODE (mode))
+-		       || (TARGET_NEON
+-			   && (VALID_NEON_DREG_MODE (mode)
+-			       || VALID_NEON_QREG_MODE (mode))));
+-
+-      /* For some conditions, bail out when lower two bits are unaligned.  */
+-      if ((val & 0x3) != 0
+-	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
+-	  && (coproc_p
+-	      /* For DI, and DF under soft-float: */
+-	      || ((mode == DImode || mode == DFmode)
+-		  /* Without ldrd, we use stm/ldm, which does not
+-		     fair well with unaligned bits.  */
+-		  && (! TARGET_LDRD
+-		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
+-		      || TARGET_THUMB2))))
+-	return false;
+-
+-      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
+-	 of which the (reg+high) gets turned into a reload add insn,
+-	 we try to decompose the index into high/low values that can often
+-	 also lead to better reload CSE.
+-	 For example:
+-	         ldr r0, [r2, #4100]  // Offset too large
+-		 ldr r1, [r2, #4104]  // Offset too large
+-
+-	 is best reloaded as:
+-	         add t1, r2, #4096
+-		 ldr r0, [t1, #4]
+-		 add t2, r2, #4096
+-		 ldr r1, [t2, #8]
+-
+-	 which post-reload CSE can simplify in most cases to eliminate the
+-	 second add instruction:
+-	         add t1, r2, #4096
+-		 ldr r0, [t1, #4]
+-		 ldr r1, [t1, #8]
+-
+-	 The idea here is that we want to split out the bits of the constant
+-	 as a mask, rather than as subtracting the maximum offset that the
+-	 respective type of load/store used can handle.
+-
+-	 When encountering negative offsets, we can still utilize it even if
+-	 the overall offset is positive; sometimes this may lead to an immediate
+-	 that can be constructed with fewer instructions.
+-	 For example:
+-	         ldr r0, [r2, #0x3FFFFC]
+-
+-	 This is best reloaded as:
+-	         add t1, r2, #0x400000
+-		 ldr r0, [t1, #-4]
+-
+-	 The trick for spotting this for a load insn with N bits of offset
+-	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
+-	 negative offset that is going to make bit N and all the bits below
+-	 it become zero in the remainder part.
+-
+-	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
+-	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
+-	 used in most cases of ARM load/store instructions.  */
+-
+-#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
+-      (((VAL) & ((1 << (N)) - 1))					\
+-       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
+-       : 0)
+-
+-      if (coproc_p)
+-	{
+-	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
+-
+-	  /* NEON quad-word load/stores are made of two double-word accesses,
+-	     so the valid index range is reduced by 8. Treat as 9-bit range if
+-	     we go over it.  */
+-	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
+-	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
+-	}
+-      else if (GET_MODE_SIZE (mode) == 8)
+-	{
+-	  if (TARGET_LDRD)
+-	    low = (TARGET_THUMB2
+-		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
+-		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
+-	  else
+-	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
+-	       to access doublewords. The supported load/store offsets are
+-	       -8, -4, and 4, which we try to produce here.  */
+-	    low = ((val & 0xf) ^ 0x8) - 0x8;
+-	}
+-      else if (GET_MODE_SIZE (mode) < 8)
+-	{
+-	  /* NEON element load/stores do not have an offset.  */
+-	  if (TARGET_NEON_FP16 && mode == HFmode)
+-	    return false;
+-
+-	  if (TARGET_THUMB2)
+-	    {
+-	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
+-		 Try the wider 12-bit range first, and re-try if the result
+-		 is out of range.  */
+-	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+-	      if (low < -255)
+-		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+-	    }
+-	  else
+-	    {
+-	      if (mode == HImode || mode == HFmode)
+-		{
+-		  if (arm_arch4)
+-		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+-		  else
+-		    {
+-		      /* The storehi/movhi_bytes fallbacks can use only
+-			 [-4094,+4094] of the full ldrb/strb index range.  */
+-		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+-		      if (low == 4095 || low == -4095)
+-			return false;
+-		    }
+-		}
+-	      else
+-		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+-	    }
+-	}
+-      else
+-	return false;
+-
+-      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
+-	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
+-	      - (unsigned HOST_WIDE_INT) 0x80000000);
+-      /* Check for overflow or zero */
+-      if (low == 0 || high == 0 || (high + low != val))
+-	return false;
+-
+-      /* Reload the high part into a base reg; leave the low part
+-	 in the mem.
+-	 Note that replacing this gen_rtx_PLUS with plus_constant is
+-	 wrong in this case because we rely on the
+-	 (plus (plus reg c1) c2) structure being preserved so that
+-	 XEXP (*p, 0) in push_reload below uses the correct term.  */
+-      *p = gen_rtx_PLUS (GET_MODE (*p),
+-			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
+-				       GEN_INT (high)),
+-			 GEN_INT (low));
+-      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+-		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+-		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+-      return true;
+-    }
+-
+-  return false;
+-}
+-
+-rtx
+-thumb_legitimize_reload_address (rtx *x_p,
+-				 machine_mode mode,
+-				 int opnum, int type,
+-				 int ind_levels ATTRIBUTE_UNUSED)
+-{
+-  rtx x = *x_p;
+-
+-  if (GET_CODE (x) == PLUS
+-      && GET_MODE_SIZE (mode) < 4
+-      && REG_P (XEXP (x, 0))
+-      && XEXP (x, 0) == stack_pointer_rtx
+-      && CONST_INT_P (XEXP (x, 1))
+-      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+-    {
+-      rtx orig_x = x;
+-
+-      x = copy_rtx (x);
+-      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+-		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+-      return x;
+-    }
+-
+-  /* If both registers are hi-regs, then it's better to reload the
+-     entire expression rather than each register individually.  That
+-     only requires one reload register rather than two.  */
+-  if (GET_CODE (x) == PLUS
+-      && REG_P (XEXP (x, 0))
+-      && REG_P (XEXP (x, 1))
+-      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
+-      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
+-    {
+-      rtx orig_x = x;
+-
+-      x = copy_rtx (x);
+-      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+-		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+-      return x;
+-    }
+-
+-  return NULL;
+-}
+-
+ /* Return TRUE if X contains any TLS symbol references.  */
+ 
+ bool
+@@ -9399,7 +9343,8 @@ static bool
+ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
+ {
+   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
+-  gcc_assert (GET_CODE (x) == UNSPEC);
++  rtx_code code = GET_CODE (x);
++  gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
+ 
+   switch (XINT (x, 1))
+     {
+@@ -9445,7 +9390,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
+       *cost = COSTS_N_INSNS (2);
+       break;
+     }
+-  return false;
++  return true;
+ }
+ 
+ /* Cost of a libcall.  We assume one insn per argument, an amount for the
+@@ -11008,6 +10953,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+       *cost = LIBCALL_COST (1);
+       return false;
+ 
++    case UNSPEC_VOLATILE:
+     case UNSPEC:
+       return arm_unspec_cost (x, outer_code, speed_p, cost);
+ 
+@@ -12908,12 +12854,12 @@ neon_expand_vector_init (rtx target, rtx vals)
+ }
+ 
+ /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
+-   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
+-   reported source locations are bogus.  */
++   ERR if it doesn't.  EXP indicates the source location, which includes the
++   inlining history for intrinsics.  */
+ 
+ static void
+ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+-	      const char *err)
++	      const_tree exp, const char *desc)
+ {
+   HOST_WIDE_INT lane;
+ 
+@@ -12922,15 +12868,22 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+   lane = INTVAL (operand);
+ 
+   if (lane < low || lane >= high)
+-    error (err);
++    {
++      if (exp)
++	error ("%K%s %lld out of range %lld - %lld",
++	       exp, desc, lane, low, high - 1);
++      else
++	error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
++    }
+ }
+ 
+ /* Bounds-check lanes.  */
+ 
+ void
+-neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
++neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
++		  const_tree exp)
+ {
+-  bounds_check (operand, low, high, "lane out of range");
++  bounds_check (operand, low, high, exp, "lane");
+ }
+ 
+ /* Bounds-check constants.  */
+@@ -12938,7 +12891,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+ void
+ neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+ {
+-  bounds_check (operand, low, high, "constant out of range");
++  bounds_check (operand, low, high, NULL_TREE, "constant");
+ }
+ 
+ HOST_WIDE_INT
+@@ -17287,14 +17240,16 @@ thumb2_reorg (void)
+ 
+   FOR_EACH_BB_FN (bb, cfun)
+     {
+-      if (current_tune->disparage_flag_setting_t16_encodings
++      if ((current_tune->disparage_flag_setting_t16_encodings
++	   == tune_params::DISPARAGE_FLAGS_ALL)
+ 	  && optimize_bb_for_speed_p (bb))
+ 	continue;
+ 
+       rtx_insn *insn;
+       Convert_Action action = SKIP;
+       Convert_Action action_for_partial_flag_setting
+-	= (current_tune->disparage_partial_flag_setting_t16_encodings
++	= ((current_tune->disparage_flag_setting_t16_encodings
++	    != tune_params::DISPARAGE_FLAGS_NEITHER)
+ 	   && optimize_bb_for_speed_p (bb))
+ 	  ? SKIP : CONV;
+ 
+@@ -17699,7 +17654,7 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
+     }
+ 
+   conditional = reverse ? "%?%D0" : "%?%d0";
+-  if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
++  if ((regno_base == SP_REGNUM) && TARGET_THUMB)
+     {
+       /* Output pop (not stmfd) because it has a shorter encoding.  */
+       gcc_assert (update);
+@@ -17998,19 +17953,27 @@ output_mov_long_double_arm_from_arm (rtx *operands)
+ void
+ arm_emit_movpair (rtx dest, rtx src)
+  {
++  rtx insn;
++
+   /* If the src is an immediate, simplify it.  */
+   if (CONST_INT_P (src))
+     {
+       HOST_WIDE_INT val = INTVAL (src);
+       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+       if ((val >> 16) & 0x0000ffff)
+-        emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+-                                             GEN_INT (16)),
+-                       GEN_INT ((val >> 16) & 0x0000ffff));
++	{
++	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
++					       GEN_INT (16)),
++			 GEN_INT ((val >> 16) & 0x0000ffff));
++	  insn = get_last_insn ();
++	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
++	}
+       return;
+     }
+    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
++   insn = get_last_insn ();
++   set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
+  }
+ 
+ /* Output a move between double words.  It must be REG<-MEM
+@@ -24077,19 +24040,19 @@ thumb_far_jump_used_p (void)
+ }
+ 
+ /* Return nonzero if FUNC must be entered in ARM mode.  */
+-int
++static bool
+ is_called_in_ARM_mode (tree func)
+ {
+   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+ 
+   /* Ignore the problem about functions whose address is taken.  */
+   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
+-    return TRUE;
++    return true;
+ 
+ #ifdef ARM_PE
+   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
+ #else
+-  return FALSE;
++  return false;
+ #endif
+ }
+ 
+@@ -24375,6 +24338,24 @@ arm_init_expanders (void)
+     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
+ }
+ 
++/* Check that FUNC is called with a different mode.  */
++
++bool
++arm_change_mode_p (tree func)
++{
++  if (TREE_CODE (func) != FUNCTION_DECL)
++    return false;
++
++  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
++
++  if (!callee_tree)
++    callee_tree = target_option_default_node;
++
++  struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
++  int flags = callee_opts->x_target_flags;
++
++  return (TARGET_THUMB_P (flags) != TARGET_THUMB);
++}
+ 
+ /* Like arm_compute_initial_elimination offset.  Simpler because there
+    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
+@@ -25660,12 +25641,12 @@ arm_print_tune_info (void)
+ 	       current_tune->constant_limit);
+   asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
+ 	       current_tune->max_insns_skipped);
+-  asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
+-	       current_tune->num_prefetch_slots);
+-  asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
+-	       current_tune->l1_cache_size);
+-  asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
+-	       current_tune->l1_cache_line_size);
++  asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
++	       current_tune->prefetch.num_slots);
++  asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
++	       current_tune->prefetch.l1_cache_size);
++  asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
++	       current_tune->prefetch.l1_cache_line_size);
+   asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
+ 	       (int) current_tune->prefer_constant_pool);
+   asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
+@@ -25681,23 +25662,19 @@ arm_print_tune_info (void)
+   asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
+ 	       (int) current_tune->prefer_ldrd_strd);
+   asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
+-	       (int) current_tune->logical_op_non_short_circuit[0],
+-	       (int) current_tune->logical_op_non_short_circuit[1]);
++	       (int) current_tune->logical_op_non_short_circuit_thumb,
++	       (int) current_tune->logical_op_non_short_circuit_arm);
+   asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
+ 	       (int) current_tune->prefer_neon_for_64bits);
+   asm_fprintf (asm_out_file,
+ 	       "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
+ 	       (int) current_tune->disparage_flag_setting_t16_encodings);
+-  asm_fprintf (asm_out_file,
+-	       "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
+-	       (int) current_tune
+-	               ->disparage_partial_flag_setting_t16_encodings);
+   asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
+ 	       (int) current_tune->string_ops_prefer_neon);
+   asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
+ 	       current_tune->max_insns_inline_memset);
+-  asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
+-	       current_tune->fuseable_ops);
++  asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
++	       current_tune->fusible_ops);
+   asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
+ 	       (int) current_tune->sched_autopref);
+ }
+@@ -25707,9 +25684,6 @@ arm_file_start (void)
+ {
+   int val;
+ 
+-  if (TARGET_UNIFIED_ASM)
+-    asm_fprintf (asm_out_file, "\t.syntax unified\n");
+-
+   if (TARGET_BPABI)
+     {
+       const char *fpu_name;
+@@ -26509,7 +26483,7 @@ arm_dbx_register_number (unsigned int regno)
+   if (IS_IWMMXT_REGNUM (regno))
+     return 112 + regno - FIRST_IWMMXT_REGNUM;
+ 
+-  gcc_unreachable ();
++  return DWARF_FRAME_REGISTERS;
+ }
+ 
+ /* Dwarf models VFPv3 registers as 32 64-bit registers.
+@@ -27213,40 +27187,12 @@ thumb2_output_casesi (rtx *operands)
+     }
+ }
+ 
+-/* Most ARM cores are single issue, but some newer ones can dual issue.
+-   The scheduler descriptions rely on this being correct.  */
++/* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
++   per-core tuning structs.  */
+ static int
+ arm_issue_rate (void)
+ {
+-  switch (arm_tune)
+-    {
+-    case xgene1:
+-      return 4;
+-
+-    case cortexa15:
+-    case cortexa57:
+-    case exynosm1:
+-      return 3;
+-
+-    case cortexm7:
+-    case cortexr4:
+-    case cortexr4f:
+-    case cortexr5:
+-    case genericv7a:
+-    case cortexa5:
+-    case cortexa7:
+-    case cortexa8:
+-    case cortexa9:
+-    case cortexa12:
+-    case cortexa17:
+-    case cortexa53:
+-    case fa726te:
+-    case marvell_pj4:
+-      return 2;
+-
+-    default:
+-      return 1;
+-    }
++  return current_tune->issue_rate;
+ }
+ 
+ /* Return how many instructions should scheduler lookahead to choose the
+@@ -29411,7 +29357,7 @@ arm_gen_setmem (rtx *operands)
+ static bool
+ arm_macro_fusion_p (void)
+ {
+-  return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
++  return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
+ }
+ 
+ 
+@@ -29432,44 +29378,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
+   if (!arm_macro_fusion_p ())
+     return false;
+ 
+-  if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
++  if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
+     {
+       /* We are trying to fuse
+-         movw imm / movt imm
+-         instructions as a group that gets scheduled together.  */
++	 movw imm / movt imm
++	 instructions as a group that gets scheduled together.  */
+ 
+       set_dest = SET_DEST (curr_set);
+ 
+       if (GET_MODE (set_dest) != SImode)
+-        return false;
++	return false;
+ 
+       /* We are trying to match:
+-         prev (movw)  == (set (reg r0) (const_int imm16))
+-         curr (movt) == (set (zero_extract (reg r0)
+-                                           (const_int 16)
+-                                           (const_int 16))
+-                             (const_int imm16_1))
+-         or
+-         prev (movw) == (set (reg r1)
+-                              (high (symbol_ref ("SYM"))))
+-         curr (movt) == (set (reg r0)
+-                             (lo_sum (reg r1)
+-                                     (symbol_ref ("SYM"))))  */
++	 prev (movw)  == (set (reg r0) (const_int imm16))
++	 curr (movt) == (set (zero_extract (reg r0)
++					  (const_int 16)
++					   (const_int 16))
++			     (const_int imm16_1))
++	 or
++	 prev (movw) == (set (reg r1)
++			      (high (symbol_ref ("SYM"))))
++	 curr (movt) == (set (reg r0)
++			     (lo_sum (reg r1)
++				     (symbol_ref ("SYM"))))  */
+       if (GET_CODE (set_dest) == ZERO_EXTRACT)
+-        {
+-          if (CONST_INT_P (SET_SRC (curr_set))
+-              && CONST_INT_P (SET_SRC (prev_set))
+-              && REG_P (XEXP (set_dest, 0))
+-              && REG_P (SET_DEST (prev_set))
+-              && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+-            return true;
+-        }
++	{
++	  if (CONST_INT_P (SET_SRC (curr_set))
++	      && CONST_INT_P (SET_SRC (prev_set))
++	      && REG_P (XEXP (set_dest, 0))
++	      && REG_P (SET_DEST (prev_set))
++	      && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
++	    return true;
++	}
+       else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+-               && REG_P (SET_DEST (curr_set))
+-               && REG_P (SET_DEST (prev_set))
+-               && GET_CODE (SET_SRC (prev_set)) == HIGH
+-               && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
+-             return true;
++	       && REG_P (SET_DEST (curr_set))
++	       && REG_P (SET_DEST (prev_set))
++	       && GET_CODE (SET_SRC (prev_set)) == HIGH
++	       && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
++	     return true;
+     }
+   return false;
+ }
+@@ -29533,6 +29479,273 @@ arm_is_constant_pool_ref (rtx x)
+ 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
+ }
+ 
++/* Remember the last target of arm_set_current_function.  */
++static GTY(()) tree arm_previous_fndecl;
++
++/* Invalidate arm_previous_fndecl.  */
++void
++arm_reset_previous_fndecl (void)
++{
++  arm_previous_fndecl = NULL_TREE;
++}
++
++/* Establish appropriate back-end context for processing the function
++   FNDECL.  The argument might be NULL to indicate processing at top
++   level, outside of any function scope.  */
++static void
++arm_set_current_function (tree fndecl)
++{
++  if (!fndecl || fndecl == arm_previous_fndecl)
++    return;
++
++  tree old_tree = (arm_previous_fndecl
++		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
++		   : NULL_TREE);
++
++  tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
++
++  arm_previous_fndecl = fndecl;
++  if (old_tree == new_tree)
++    return;
++
++  if (new_tree && new_tree != target_option_default_node)
++    {
++      cl_target_option_restore (&global_options,
++				TREE_TARGET_OPTION (new_tree));
++
++      if (TREE_TARGET_GLOBALS (new_tree))
++	restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
++      else
++	TREE_TARGET_GLOBALS (new_tree)
++	  = save_target_globals_default_opts ();
++    }
++
++  else if (old_tree && old_tree != target_option_default_node)
++    {
++      new_tree = target_option_current_node;
++
++      cl_target_option_restore (&global_options,
++				TREE_TARGET_OPTION (new_tree));
++      if (TREE_TARGET_GLOBALS (new_tree))
++	restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
++      else if (new_tree == target_option_default_node)
++	restore_target_globals (&default_target_globals);
++      else
++	TREE_TARGET_GLOBALS (new_tree)
++	  = save_target_globals_default_opts ();
++    }
++
++  arm_option_params_internal ();
++}
++
++/* Implement TARGET_OPTION_PRINT.  */
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
++static void
++arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
 +{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
++  int flags = ptr->x_target_flags;
++
++  fprintf (file, "%*sselected arch %s\n", indent, "",
++	   TARGET_THUMB2_P (flags) ? "thumb2" :
++	   TARGET_THUMB_P (flags) ? "thumb1" :
++	   "arm");
 +}
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
++/* Hook to determine if one function can safely inline another.  */
++
++static bool
++arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
 +{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
++  /* Overidde default hook: Always OK to inline between different modes. 
++     Function with mode specific instructions, e.g using asm, must be explicitely 
++     protected with noinline.  */
++  return true;
 +}
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
++/* Inner function to process the attribute((target(...))), take an argument and
++   set the current options from the argument.  If we have a list, recursively
++   go over the list.  */
++
++static bool
++arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
 +{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
++  if (TREE_CODE (args) == TREE_LIST)
++    {
++      bool ret = true;
++      for (; args; args = TREE_CHAIN (args))
++	if (TREE_VALUE (args)
++	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
++	  ret = false;
++      return ret;
++    }
++
++  else if (TREE_CODE (args) != STRING_CST)
++    {
++      error ("attribute %<target%> argument not a string");
++      return false;
++    }
++
++  char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
++  while (argstr && *argstr != '\0')
++    {
++      while (ISSPACE (*argstr))
++	argstr++;
++
++      if (!strcmp (argstr, "thumb"))
++	{
++	  opts->x_target_flags |= MASK_THUMB;
++	  arm_option_check_internal (opts);
++	  return true;
++	}
++
++      if (!strcmp (argstr, "arm"))
++	{
++	  opts->x_target_flags &= ~MASK_THUMB;
++	  arm_option_check_internal (opts);
++	  return true;
++	}
++
++      warning (0, "attribute(target(\"%s\")) is unknown", argstr);
++      return false;
++    }
++
++  return false;
++}
++
++/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
++
++tree
++arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
++				 struct gcc_options *opts_set)
++{
++  if (!arm_valid_target_attribute_rec (args, opts))
++    return NULL_TREE;
++
++  /* Do any overrides, such as global options arch=xxx.  */
++  arm_option_override_internal (opts, opts_set);
++
++  return build_target_option_node (opts);
++}
++
++static void 
++add_attribute  (const char * mode, tree *attributes)
++{
++  size_t len = strlen (mode);
++  tree value = build_string (len, mode);
++
++  TREE_TYPE (value) = build_array_type (char_type_node,
++					build_index_type (size_int (len)));
++
++  *attributes = tree_cons (get_identifier ("target"),
++			   build_tree_list (NULL_TREE, value),
++			   *attributes);
++}
++
++/* For testing. Insert thumb or arm modes alternatively on functions.  */
++
++static void
++arm_insert_attributes (tree fndecl, tree * attributes)
++{
++  const char *mode;
++
++  if (! TARGET_FLIP_THUMB)
++    return;
++
++  if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
++      || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
++   return;
++
++  /* Nested definitions must inherit mode.  */
++  if (current_function_decl)
++   {
++     mode = TARGET_THUMB ? "thumb" : "arm";      
++     add_attribute (mode, attributes);
++     return;
++   }
++
++  /* If there is already a setting don't change it.  */
++  if (lookup_attribute ("target", *attributes) != NULL)
++    return;
++
++  mode = thumb_flipper ? "thumb" : "arm";
++  add_attribute (mode, attributes);
++
++  thumb_flipper = !thumb_flipper;
++}
++
++/* Hook to validate attribute((target("string"))).  */
++
++static bool
++arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
++			      tree args, int ARG_UNUSED (flags))
++{
++  bool ret = true;
++  struct gcc_options func_options;
++  tree cur_tree, new_optimize;
++  gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
++
++  /* Get the optimization options of the current function.  */
++  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
++
++  /* If the function changed the optimization levels as well as setting target
++     options, start with the optimizations specified.  */
++  if (!func_optimize)
++    func_optimize = optimization_default_node;
++
++  /* Init func_options.  */
++  memset (&func_options, 0, sizeof (func_options));
++  init_options_struct (&func_options, NULL);
++  lang_hooks.init_options_struct (&func_options);
++
++  /* Initialize func_options to the defaults.  */
++  cl_optimization_restore (&func_options,
++			   TREE_OPTIMIZATION (func_optimize));
++
++  cl_target_option_restore (&func_options,
++			    TREE_TARGET_OPTION (target_option_default_node));
++
++  /* Set func_options flags with new target mode.  */
++  cur_tree = arm_valid_target_attribute_tree (args, &func_options,
++					      &global_options_set);
++
++  if (cur_tree == NULL_TREE)
++    ret = false;
++
++  new_optimize = build_optimization_node (&func_options);
++
++  DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
++
++  DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
++
++  return ret;
++}
++
++void
++arm_declare_function_name (FILE *stream, const char *name, tree decl)
++{
++  if (TARGET_UNIFIED_ASM)
++    fprintf (stream, "\t.syntax unified\n");
++  else
++    fprintf (stream, "\t.syntax divided\n");
++
++  if (TARGET_THUMB)
++    {
++      if (is_called_in_ARM_mode (decl)
++	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
++	      && cfun->is_thunk))
++	fprintf (stream, "\t.code 32\n");
++      else if (TARGET_THUMB1)
++	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
++      else
++	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
++    }
++  else
++    fprintf (stream, "\t.arm\n");
++
++  if (TARGET_POKE_FUNCTION_NAME)
++    arm_poke_function_name (stream, (const char *) name);
 +}
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
+ /* If MEM is in the form of [base+offset], extract the two parts
+    of address and set to BASE and OFFSET, otherwise return false
+    after clearing BASE and OFFSET.  */
+--- a/src/gcc/config/arm/arm.h
++++ b/src/gcc/config/arm/arm.h
+@@ -45,132 +45,7 @@
+ extern char arm_arch_name[];
+ 
+ /* Target CPU builtins.  */
+-#define TARGET_CPU_CPP_BUILTINS()			\
+-  do							\
+-    {							\
+-	if (TARGET_DSP_MULTIPLY)			\
+-	   builtin_define ("__ARM_FEATURE_DSP");	\
+-        if (TARGET_ARM_QBIT)				\
+-           builtin_define ("__ARM_FEATURE_QBIT");	\
+-        if (TARGET_ARM_SAT)				\
+-           builtin_define ("__ARM_FEATURE_SAT");	\
+-        if (TARGET_CRYPTO)				\
+-	   builtin_define ("__ARM_FEATURE_CRYPTO");	\
+-	if (unaligned_access)				\
+-	  builtin_define ("__ARM_FEATURE_UNALIGNED");	\
+-	if (TARGET_CRC32)				\
+-	  builtin_define ("__ARM_FEATURE_CRC32");	\
+-	if (TARGET_32BIT)				\
+-	  builtin_define ("__ARM_32BIT_STATE");		\
+-	if (TARGET_ARM_FEATURE_LDREX)				\
+-	  builtin_define_with_int_value (			\
+-	    "__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX);	\
+-	if ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)		\
+-	     || TARGET_ARM_ARCH_ISA_THUMB >=2)			\
+-	  builtin_define ("__ARM_FEATURE_CLZ");			\
+-	if (TARGET_INT_SIMD)					\
+-	  builtin_define ("__ARM_FEATURE_SIMD32");		\
+-								\
+-	builtin_define_with_int_value (				\
+-	  "__ARM_SIZEOF_MINIMAL_ENUM",				\
+-	  flag_short_enums ? 1 : 4);				\
+-	builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T",	\
+-				    wchar_type_node);		\
+-	if (TARGET_ARM_ARCH_PROFILE)				\
+-	  builtin_define_with_int_value (			\
+-	    "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE);	\
+-								\
+-	/* Define __arm__ even when in thumb mode, for	\
+-	   consistency with armcc.  */			\
+-	builtin_define ("__arm__");			\
+-	if (TARGET_ARM_ARCH)				\
+-	  builtin_define_with_int_value (		\
+-	    "__ARM_ARCH", TARGET_ARM_ARCH);		\
+-	if (arm_arch_notm)				\
+-	  builtin_define ("__ARM_ARCH_ISA_ARM");	\
+-	builtin_define ("__APCS_32__");			\
+-	if (TARGET_THUMB)				\
+-	  builtin_define ("__thumb__");			\
+-	if (TARGET_THUMB2)				\
+-	  builtin_define ("__thumb2__");		\
+-	if (TARGET_ARM_ARCH_ISA_THUMB)			\
+-	  builtin_define_with_int_value (		\
+-	    "__ARM_ARCH_ISA_THUMB",			\
+-	    TARGET_ARM_ARCH_ISA_THUMB);			\
+-							\
+-	if (TARGET_BIG_END)				\
+-	  {						\
+-	    builtin_define ("__ARMEB__");		\
+-	    builtin_define ("__ARM_BIG_ENDIAN");	\
+-	    if (TARGET_THUMB)				\
+-	      builtin_define ("__THUMBEB__");		\
+-	  }						\
+-        else						\
+-	  {						\
+-	    builtin_define ("__ARMEL__");		\
+-	    if (TARGET_THUMB)				\
+-	      builtin_define ("__THUMBEL__");		\
+-	  }						\
+-							\
+-	if (TARGET_SOFT_FLOAT)				\
+-	  builtin_define ("__SOFTFP__");		\
+-							\
+-	if (TARGET_VFP)					\
+-	  builtin_define ("__VFP_FP__");		\
+-							\
+-	if (TARGET_ARM_FP)				\
+-	  builtin_define_with_int_value (		\
+-	    "__ARM_FP", TARGET_ARM_FP);			\
+-	if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)		\
+-	  builtin_define ("__ARM_FP16_FORMAT_IEEE");		\
+-	if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)	\
+-	  builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE");	\
+-        if (TARGET_FMA)					\
+-          builtin_define ("__ARM_FEATURE_FMA");		\
+-							\
+-	if (TARGET_NEON)				\
+-	  {						\
+-	    builtin_define ("__ARM_NEON__");		\
+-	    builtin_define ("__ARM_NEON");		\
+-	  }						\
+-	if (TARGET_NEON_FP)				\
+-	  builtin_define_with_int_value (		\
+-	    "__ARM_NEON_FP", TARGET_NEON_FP);		\
+-							\
+-	/* Add a define for interworking.		\
+-	   Needed when building libgcc.a.  */		\
+-	if (arm_cpp_interwork)				\
+-	  builtin_define ("__THUMB_INTERWORK__");	\
+-							\
+-	builtin_assert ("cpu=arm");			\
+-	builtin_assert ("machine=arm");			\
+-							\
+-	builtin_define (arm_arch_name);			\
+-	if (arm_arch_xscale)				\
+-	  builtin_define ("__XSCALE__");		\
+-	if (arm_arch_iwmmxt)				\
+-          {						\
+-	    builtin_define ("__IWMMXT__");		\
+-	    builtin_define ("__ARM_WMMX");		\
+-	  }						\
+-	if (arm_arch_iwmmxt2)				\
+-	  builtin_define ("__IWMMXT2__");		\
+-	if (TARGET_AAPCS_BASED)				\
+-	  {						\
+-	    if (arm_pcs_default == ARM_PCS_AAPCS_VFP)	\
+-	      builtin_define ("__ARM_PCS_VFP");		\
+-	    else if (arm_pcs_default == ARM_PCS_AAPCS)	\
+-	      builtin_define ("__ARM_PCS");		\
+-	    builtin_define ("__ARM_EABI__");		\
+-	  }						\
+-	if (TARGET_IDIV)				\
+-         {						\
+-            builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
+-            builtin_define ("__ARM_FEATURE_IDIV");	\
+-         }						\
+-	if (inline_asm_unified)				\
+-	  builtin_define ("__ARM_ASM_SYNTAX_UNIFIED__");\
+-    } while (0)
++#define TARGET_CPU_CPP_BUILTINS() arm_cpu_cpp_builtins (pfile)
+ 
+ #include "config/arm/arm-opts.h"
+ 
+@@ -252,6 +127,11 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
+ #define SUBTARGET_CPP_SPEC      ""
+ #endif
+ 
++/* Tree Target Specification.  */
++#define TARGET_ARM_P(flags)    (!TARGET_THUMB_P (flags))
++#define TARGET_THUMB1_P(flags) (TARGET_THUMB_P (flags) && !arm_arch_thumb2)
++#define TARGET_THUMB2_P(flags) (TARGET_THUMB_P (flags) && arm_arch_thumb2)
++
+ /* Run-time Target Specification.  */
+ #define TARGET_SOFT_FLOAT		(arm_float_abi == ARM_FLOAT_ABI_SOFT)
+ /* Use hardware floating point instructions. */
+@@ -367,21 +247,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
+ #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
+ 
+ /* Nonzero if this chip supports ldrex and strex */
+-#define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
++#define TARGET_HAVE_LDREX        ((arm_arch6 && TARGET_ARM) || arm_arch7)
+ 
+ /* Nonzero if this chip supports ldrex{bh} and strex{bh}.  */
+-#define TARGET_HAVE_LDREXBH	((arm_arch6k && TARGET_ARM) || arm_arch7)
++#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7)
+ 
+ /* Nonzero if this chip supports ldrexd and strexd.  */
+-#define TARGET_HAVE_LDREXD	(((arm_arch6k && TARGET_ARM) || arm_arch7) \
+-				 && arm_arch_notm)
++#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) \
++			     || arm_arch7) && arm_arch_notm)
+ 
+ /* Nonzero if this chip supports load-acquire and store-release.  */
+ #define TARGET_HAVE_LDACQ	(TARGET_ARM_ARCH >= 8)
+ 
+ /* Nonzero if integer division instructions supported.  */
+-#define TARGET_IDIV		((TARGET_ARM && arm_arch_arm_hwdiv) \
+-				 || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
++#define TARGET_IDIV	((TARGET_ARM && arm_arch_arm_hwdiv)	\
++			 || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
+ 
+ /* Nonzero if disallow volatile memory access in IT block.  */
+ #define TARGET_NO_VOLATILE_CE		(arm_arch_no_volatile_ce)
+@@ -389,6 +269,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
+ /* Should NEON be used for 64-bits bitops.  */
+ #define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
+ 
++/* Should constant I be slplit for OP.  */
++#define DONT_EARLY_SPLIT_CONSTANT(i, op) \
++				((optimize >= 2) \
++				 && can_create_pseudo_p () \
++				 && !const_ok_for_op (i, op))
++
+ /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
+    then TARGET_AAPCS_BASED must be true -- but the converse does not
+    hold.  TARGET_BPABI implies the use of the BPABI runtime library,
+@@ -473,7 +359,7 @@ enum base_architecture
+   BASE_ARCH_5TEJ = 5,
+   BASE_ARCH_6 = 6,
+   BASE_ARCH_6J = 6,
+-  BASE_ARCH_6ZK = 6,
++  BASE_ARCH_6KZ = 6,
+   BASE_ARCH_6K = 6,
+   BASE_ARCH_6T2 = 6,
+   BASE_ARCH_6M = 6,
+@@ -528,12 +414,6 @@ extern int arm_arch8;
+ /* Nonzero if this chip can benefit from load scheduling.  */
+ extern int arm_ld_sched;
+ 
+-/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2.  */
+-extern int thumb_code;
+-
+-/* Nonzero if generating Thumb-1 code.  */
+-extern int thumb1_code;
+-
+ /* Nonzero if this chip is a StrongARM.  */
+ extern int arm_tune_strongarm;
+ 
+@@ -1360,46 +1240,6 @@ enum reg_class
+      ? GENERAL_REGS : NO_REGS)					\
+     : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+ 
+-/* Try a machine-dependent way of reloading an illegitimate address
+-   operand.  If we find one, push the reload and jump to WIN.  This
+-   macro is used in only one place: `find_reloads_address' in reload.c.
+-
+-   For the ARM, we wish to handle large displacements off a base
+-   register by splitting the addend across a MOV and the mem insn.
+-   This can cut the number of reloads needed.  */
+-#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN)	   \
+-  do									   \
+-    {									   \
+-      if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND))	   \
+-	goto WIN;							   \
+-    }									   \
+-  while (0)
+-
+-/* XXX If an HImode FP+large_offset address is converted to an HImode
+-   SP+large_offset address, then reload won't know how to fix it.  It sees
+-   only that SP isn't valid for HImode, and so reloads the SP into an index
+-   register, but the resulting address is still invalid because the offset
+-   is too big.  We fix it here instead by reloading the entire address.  */
+-/* We could probably achieve better results by defining PROMOTE_MODE to help
+-   cope with the variances between the Thumb's signed and unsigned byte and
+-   halfword load instructions.  */
+-/* ??? This should be safe for thumb2, but we may be able to do better.  */
+-#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)     \
+-do {									      \
+-  rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \
+-  if (new_x)								      \
+-    {									      \
+-      X = new_x;							      \
+-      goto WIN;								      \
+-    }									      \
+-} while (0)
+-
+-#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)   \
+-  if (TARGET_ARM)							   \
+-    ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \
+-  else									   \
+-    THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)
+-
+ /* Return the maximum number of consecutive registers
+    needed to represent mode MODE in a register of class CLASS.
+    ARM regs are UNITS_PER_WORD bits.  
+@@ -2096,10 +1936,11 @@ enum arm_auto_incmodes
+   (current_tune->branch_cost (speed_p, predictable_p))
+ 
+ /* False if short circuit operation is preferred.  */
+-#define LOGICAL_OP_NON_SHORT_CIRCUIT				\
+-  ((optimize_size)						\
+-   ? (TARGET_THUMB ? false : true)				\
+-   : (current_tune->logical_op_non_short_circuit[TARGET_ARM]))
++#define LOGICAL_OP_NON_SHORT_CIRCUIT					\
++  ((optimize_size)							\
++   ? (TARGET_THUMB ? false : true)					\
++   : TARGET_THUMB ? static_cast<bool> (current_tune->logical_op_non_short_circuit_thumb) \
++   : static_cast<bool> (current_tune->logical_op_non_short_circuit_arm))
+ 
+ 
+ /* Position Independent Code.  */
+@@ -2135,7 +1976,8 @@ extern int making_const_table;
+   c_register_pragma (0, "long_calls", arm_pr_long_calls);		\
+   c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls);		\
+   c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off);	\
+-  arm_lang_object_attributes_init(); \
++  arm_lang_object_attributes_init();					\
++  arm_register_target_pragmas();                                       \
+ } while (0)
+ 
+ /* Condition code information.  */
+@@ -2222,23 +2064,7 @@ extern int making_const_table;
+    ? 1 : 0)
+ 
+ #define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) 	\
+-  do							\
+-    {							\
+-      if (TARGET_THUMB) 				\
+-        {						\
+-          if (is_called_in_ARM_mode (DECL)		\
+-	      || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY	\
+-		  && cfun->is_thunk))	\
+-            fprintf (STREAM, "\t.code 32\n") ;		\
+-          else if (TARGET_THUMB1)			\
+-           fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ;	\
+-          else						\
+-           fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ;	\
+-        }						\
+-      if (TARGET_POKE_FUNCTION_NAME)			\
+-        arm_poke_function_name (STREAM, (const char *) NAME);	\
+-    }							\
+-  while (0)
++  arm_declare_function_name ((STREAM), (NAME), (DECL));
+ 
+ /* For aliases of functions we use .thumb_set instead.  */
+ #define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2)		\
+@@ -2413,4 +2239,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
+ 
+ #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+ #define TARGET_SUPPORTS_WIDE_INT 1
++
++/* For switching between functions with different target attributes.  */
++#define SWITCHABLE_TARGET 1
++
+ #endif /* ! GCC_ARM_H */
+--- a/src/gcc/config/arm/arm.md
++++ b/src/gcc/config/arm/arm.md
+@@ -69,13 +69,17 @@
+ ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
+ ; generating ARM code.  This is used to control the length of some insn
+ ; patterns that share the same RTL in both ARM and Thumb code.
+-(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
++(define_attr "is_thumb" "yes,no"
++  (const (if_then_else (symbol_ref "TARGET_THUMB")
++		       (const_string "yes") (const_string "no"))))
+ 
+ ; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6.
+ (define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6")))
+ 
+ ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
+-(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
++(define_attr "is_thumb1" "yes,no"
++  (const (if_then_else (symbol_ref "TARGET_THUMB1")
++		       (const_string "yes") (const_string "no"))))
+ 
+ ; We use this attribute to disable alternatives that can produce 32-bit
+ ; instructions inside an IT-block in Thumb2 state.  ARMv8 deprecates IT blocks
+@@ -1164,10 +1168,16 @@
+     {
+       if (TARGET_32BIT)
+         {
+-          arm_split_constant (MINUS, SImode, NULL_RTX,
+-	                      INTVAL (operands[1]), operands[0],
+-	  		      operands[2], optimize && can_create_pseudo_p ());
+-          DONE;
++	  if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[1]), MINUS))
++	    operands[1] = force_reg (SImode, operands[1]);
++	  else
++	    {
++	      arm_split_constant (MINUS, SImode, NULL_RTX,
++				  INTVAL (operands[1]), operands[0],
++				  operands[2],
++				  optimize && can_create_pseudo_p ());
++	      DONE;
++	    }
+ 	}
+       else /* TARGET_THUMB1 */
+         operands[1] = force_reg (SImode, operands[1]);
+@@ -1177,9 +1187,9 @@
+ 
+ ; ??? Check Thumb-2 split length
+ (define_insn_and_split "*arm_subsi3_insn"
+-  [(set (match_operand:SI           0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r")
+-	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n")
+-		  (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))]
++  [(set (match_operand:SI           0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r")
++	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n")
++		  (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))]
+   "TARGET_32BIT"
+   "@
+    sub%?\\t%0, %1, %2
+@@ -2078,14 +2088,19 @@
+ 	      operands[1] = convert_to_mode (QImode, operands[1], 1);
+ 	      emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
+ 							 operands[1]));
++	      DONE;
+ 	    }
++	  else if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), AND))
++	    operands[2] = force_reg (SImode, operands[2]);
+ 	  else
+-	    arm_split_constant (AND, SImode, NULL_RTX,
+-				INTVAL (operands[2]), operands[0],
+-				operands[1],
+-				optimize && can_create_pseudo_p ());
++	    {
++	      arm_split_constant (AND, SImode, NULL_RTX,
++				  INTVAL (operands[2]), operands[0],
++				  operands[1],
++				  optimize && can_create_pseudo_p ());
+ 
+-          DONE;
++	      DONE;
++	    }
+         }
+     }
+   else /* TARGET_THUMB1 */
+@@ -2768,6 +2783,55 @@
+ 		      (const_string "logic_shift_reg")))]
+ )
+ 
++;; Shifted bics pattern used to set up CC status register and not reusing
++;; bics output.  Pattern restricts Thumb2 shift operand as bics for Thumb2
++;; does not support shift by register.
++(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
++  [(set (reg:CC_NOOV CC_REGNUM)
++	(compare:CC_NOOV
++		(and:SI (not:SI (match_operator:SI 0 "shift_operator"
++			[(match_operand:SI 1 "s_register_operand" "r")
++			 (match_operand:SI 2 "arm_rhs_operand" "rM")]))
++			(match_operand:SI 3 "s_register_operand" "r"))
++		(const_int 0)))
++   (clobber (match_scratch:SI 4 "=r"))]
++  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
++  "bic%.%?\\t%4, %3, %1%S0"
++  [(set_attr "predicable" "yes")
++   (set_attr "predicable_short_it" "no")
++   (set_attr "conds" "set")
++   (set_attr "shift" "1")
++   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
++		      (const_string "logic_shift_imm")
++		      (const_string "logic_shift_reg")))]
++)
++
++;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also
++;; getting reused later.
++(define_insn "andsi_not_shiftsi_si_scc"
++  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
++	(compare:CC_NOOV
++		(and:SI (not:SI (match_operator:SI 0 "shift_operator"
++			[(match_operand:SI 1 "s_register_operand" "r")
++			 (match_operand:SI 2 "arm_rhs_operand" "rM")]))
++			(match_operand:SI 3 "s_register_operand" "r"))
++		(const_int 0)))
++	(set (match_operand:SI 4 "s_register_operand" "=r")
++	     (and:SI (not:SI (match_op_dup 0
++		     [(match_dup 1)
++		      (match_dup 2)]))
++		     (match_dup 3)))])]
++  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
++  "bic%.%?\\t%4, %3, %1%S0"
++  [(set_attr "predicable" "yes")
++   (set_attr "predicable_short_it" "no")
++   (set_attr "conds" "set")
++   (set_attr "shift" "1")
++   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
++		      (const_string "logic_shift_imm")
++		      (const_string "logic_shift_reg")))]
++)
++
+ (define_insn "*andsi_notsi_si_compare0"
+   [(set (reg:CC_NOOV CC_REGNUM)
+ 	(compare:CC_NOOV
+@@ -2884,10 +2948,16 @@
+     {
+       if (TARGET_32BIT)
+         {
+-          arm_split_constant (IOR, SImode, NULL_RTX,
+-	                      INTVAL (operands[2]), operands[0], operands[1],
+-			      optimize && can_create_pseudo_p ());
+-          DONE;
++	  if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), IOR))
++	    operands[2] = force_reg (SImode, operands[2]);
++	  else
++	    {
++	      arm_split_constant (IOR, SImode, NULL_RTX,
++				  INTVAL (operands[2]), operands[0],
++				  operands[1],
++				  optimize && can_create_pseudo_p ());
++	      DONE;
++	    }
+ 	}
+       else /* TARGET_THUMB1 */
+         {
+@@ -3054,10 +3124,16 @@
+     {
+       if (TARGET_32BIT)
+         {
+-          arm_split_constant (XOR, SImode, NULL_RTX,
+-	                      INTVAL (operands[2]), operands[0], operands[1],
+-			      optimize && can_create_pseudo_p ());
+-          DONE;
++	  if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[2]), XOR))
++	    operands[2] = force_reg (SImode, operands[2]);
++	  else
++	    {
++	      arm_split_constant (XOR, SImode, NULL_RTX,
++				  INTVAL (operands[2]), operands[0],
++				  operands[1],
++				  optimize && can_create_pseudo_p ());
++	      DONE;
++	    }
+ 	}
+       else /* TARGET_THUMB1 */
+         {
+@@ -5076,7 +5152,7 @@
+ 
+ (define_split
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(ior_xor:SI (and:SI (ashift:SI
++	(IOR_XOR:SI (and:SI (ashift:SI
+ 			     (match_operand:SI 1 "s_register_operand" "")
+ 			     (match_operand:SI 2 "const_int_operand" ""))
+ 			    (match_operand:SI 3 "const_int_operand" ""))
+@@ -5088,7 +5164,7 @@
+        == (GET_MODE_MASK (GET_MODE (operands[5]))
+            & (GET_MODE_MASK (GET_MODE (operands[5]))
+ 	      << (INTVAL (operands[2])))))"
+-  [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
++  [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2))
+ 				  (match_dup 4)))
+    (set (match_dup 0) (zero_extend:SI (match_dup 5)))]
+   "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
+@@ -5590,10 +5666,18 @@
+           && !(const_ok_for_arm (INTVAL (operands[1]))
+                || const_ok_for_arm (~INTVAL (operands[1]))))
+         {
+-           arm_split_constant (SET, SImode, NULL_RTX,
+-	                       INTVAL (operands[1]), operands[0], NULL_RTX,
+-			       optimize && can_create_pseudo_p ());
+-          DONE;
++	   if (DONT_EARLY_SPLIT_CONSTANT (INTVAL (operands[1]), SET))
++	     {
++		emit_insn (gen_rtx_SET (SImode, operands[0], operands[1]));
++		DONE;
++	     }
++	  else
++	     {
++		arm_split_constant (SET, SImode, NULL_RTX,
++	                            INTVAL (operands[1]), operands[0], NULL_RTX,
++			            optimize && can_create_pseudo_p ());
++		DONE;
++	     }
+         }
+     }
+   else /* TARGET_THUMB1...  */
+@@ -5667,7 +5751,7 @@
+   [(set_attr "predicable" "yes")
+    (set_attr "predicable_short_it" "no")
+    (set_attr "length" "4")
+-   (set_attr "type" "mov_imm")]
++   (set_attr "type" "alu_sreg")]
+ )
+ 
+ (define_insn "*arm_movsi_insn"
+@@ -6713,7 +6797,7 @@
+ 
+   /* Support only fixed point registers.  */
+   if (!CONST_INT_P (operands[2])
+-      || INTVAL (operands[2]) > 14
++      || INTVAL (operands[2]) > MAX_LDM_STM_OPS
+       || INTVAL (operands[2]) < 2
+       || !MEM_P (operands[1])
+       || !REG_P (operands[0])
+@@ -6738,7 +6822,7 @@
+ 
+   /* Support only fixed point registers.  */
+   if (!CONST_INT_P (operands[2])
+-      || INTVAL (operands[2]) > 14
++      || INTVAL (operands[2]) > MAX_LDM_STM_OPS
+       || INTVAL (operands[2]) < 2
+       || !REG_P (operands[1])
+       || !MEM_P (operands[0])
+@@ -6923,7 +7007,7 @@
+   [(set_attr "conds" "set")
+    (set_attr "shift" "1")
+    (set_attr "arch" "32,a,a")
+-   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
++   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
+ 
+ (define_insn "*cmpsi_shiftsi_swp"
+   [(set (reg:CC_SWP CC_REGNUM)
+@@ -6936,7 +7020,7 @@
+   [(set_attr "conds" "set")
+    (set_attr "shift" "1")
+    (set_attr "arch" "32,a,a")
+-   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
++   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
+ 
+ (define_insn "*arm_cmpsi_negshiftsi_si"
+   [(set (reg:CC_Z CC_REGNUM)
+@@ -7529,10 +7613,10 @@
+                                         (const_string "mov_imm")
+                                         (const_string "mov_reg"))
+                           (const_string "mvn_imm")
+-                          (const_string "mov_reg")
+-                          (const_string "mov_reg")
+-                          (const_string "mov_reg")
+-                          (const_string "mov_reg")])]
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*movsfcc_soft_insn"
+@@ -7755,6 +7839,13 @@
+    && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
+   "*
+   {
++   rtx op = operands[0];
++
++   /* Switch mode now when possible.  */
++   if (SYMBOL_REF_DECL (op) && !TREE_PUBLIC (SYMBOL_REF_DECL (op))
++        && arm_arch5 && arm_change_mode_p (SYMBOL_REF_DECL (op)))
++      return NEED_PLT_RELOC ? \"blx%?\\t%a0(PLT)\" : \"blx%?\\t(%a0)\";
++
+     return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\";
+   }"
+   [(set_attr "type" "call")]
+@@ -7772,6 +7863,13 @@
+    && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
+   "*
+   {
++   rtx op = operands[1];
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
++   /* Switch mode now when possible.  */
++   if (SYMBOL_REF_DECL (op) && !TREE_PUBLIC (SYMBOL_REF_DECL (op))
++        && arm_arch5 && arm_change_mode_p (SYMBOL_REF_DECL (op)))
++      return NEED_PLT_RELOC ? \"blx%?\\t%a1(PLT)\" : \"blx%?\\t(%a1)\";
++
+     return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\";
+   }"
+   [(set_attr "type" "call")]
+@@ -7885,7 +7983,7 @@
+ )
+ 
+ (define_expand "<return_str>return"
+-  [(returns)]
++  [(RETURNS)]
+   "(TARGET_ARM || (TARGET_THUMB2
+                    && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                    && !IS_STACKALIGN (arm_current_func_type ())))
+@@ -7923,7 +8021,7 @@
+   [(set (pc)
+         (if_then_else (match_operator 0 "arm_comparison_operator"
+ 		       [(match_operand 1 "cc_register" "") (const_int 0)])
+-                      (returns)
++                      (RETURNS)
+                       (pc)))]
+   "TARGET_ARM  <return_cond_true>"
+   "*
+@@ -7946,7 +8044,7 @@
+         (if_then_else (match_operator 0 "arm_comparison_operator"
+ 		       [(match_operand 1 "cc_register" "") (const_int 0)])
+                       (pc)
+-		      (returns)))]
++		      (RETURNS)))]
+   "TARGET_ARM <return_cond_true>"
+   "*
+   {
+@@ -8280,7 +8378,7 @@
+ 
+ (define_insn "*<arith_shift_insn>_multsi"
+   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+-	(shiftable_ops:SI
++	(SHIFTABLE_OPS:SI
+ 	 (mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
+ 		  (match_operand:SI 3 "power_of_two_operand" ""))
+ 	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
+@@ -8294,7 +8392,7 @@
+ 
+ (define_insn "*<arith_shift_insn>_shiftsi"
+   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+-	(shiftable_ops:SI
++	(SHIFTABLE_OPS:SI
+ 	 (match_operator:SI 2 "shift_nomul_operator"
+ 	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+ 	   (match_operand:SI 4 "shift_amount_operand" "M,M,r")])
+@@ -8690,7 +8788,14 @@
+     return \"\";
+   "
+   [(set_attr "conds" "use")
+-   (set_attr "type" "mov_reg,mov_reg,multiple")
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (if_then_else (match_operand 1 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (const_string "multiple")])
+    (set_attr "length" "4,4,8")]
+ )
+ 
+@@ -9486,8 +9591,8 @@
+                                         (const_string "alu_imm" )
+                                         (const_string "alu_sreg"))
+                           (const_string "alu_imm")
+-                          (const_string "alu_sreg")
+-                          (const_string "alu_sreg")])]
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_move_plus"
+@@ -9524,7 +9629,13 @@
+    sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,4,8,8")
+-   (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 3 "const_int_operand" "")
++                                        (const_string "alu_imm" )
++                                        (const_string "alu_sreg"))
++                          (const_string "alu_imm")
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_arith_arith"
+@@ -9619,7 +9730,11 @@
+    %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,8")
+-   (set_attr "type" "alu_shift_reg,multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 3 "const_int_operand" "")
++                                        (const_string "alu_shift_imm" )
++                                        (const_string "alu_shift_reg"))
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_move_arith"
+@@ -9680,7 +9795,11 @@
+    %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,8")
+-   (set_attr "type" "alu_shift_reg,multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 3 "const_int_operand" "")
++                                        (const_string "alu_shift_imm" )
++                                        (const_string "alu_shift_reg"))
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_move_not"
+@@ -9787,7 +9906,12 @@
+   [(set_attr "conds" "use")
+    (set_attr "shift" "2")
+    (set_attr "length" "4,8,8")
+-   (set_attr "type" "mov_shift_reg,multiple,multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 3 "const_int_operand" "")
++                                        (const_string "mov_shift" )
++                                        (const_string "mov_shift_reg"))
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_move_shift"
+@@ -9825,7 +9949,12 @@
+   [(set_attr "conds" "use")
+    (set_attr "shift" "2")
+    (set_attr "length" "4,8,8")
+-   (set_attr "type" "mov_shift_reg,multiple,multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 3 "const_int_operand" "")
++                                        (const_string "mov_shift" )
++                                        (const_string "mov_shift_reg"))
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*ifcompare_shift_shift"
+@@ -10906,7 +11035,7 @@
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "length" "4")
+-  (set_attr "type" "mov_imm")]
++  (set_attr "type" "alu_sreg")]
+ )
+ 
+ (define_insn "*arm_rev"
+--- a/src/gcc/config/arm/arm.opt
++++ b/src/gcc/config/arm/arm.opt
+@@ -122,6 +122,10 @@ Enum(float_abi_type) String(softfp) Value(ARM_FLOAT_ABI_SOFTFP)
+ EnumValue
+ Enum(float_abi_type) String(hard) Value(ARM_FLOAT_ABI_HARD)
+ 
++mflip-thumb
++Target Report Var(TARGET_FLIP_THUMB) Undocumented
++Switch ARM/Thumb modes on alternating functions for compiler testing
++
+ mfp16-format=
+ Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_NONE)
+ Specify the __fp16 floating-point format
+@@ -182,7 +186,7 @@ Target RejectNegative Joined UInteger Var(arm_structure_size_boundary) Init(DEFA
+ Specify the minimum bit alignment of structures
+ 
+ mthumb
+-Target Report RejectNegative Mask(THUMB)
++Target Report RejectNegative Mask(THUMB) Save
+ Generate code for Thumb state
+ 
+ mthumb-interwork
+@@ -246,7 +250,7 @@ Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
+ Only generate absolute relocations on word sized values.
+ 
+ mrestrict-it
+-Target Report Var(arm_restrict_it) Init(2)
++Target Report Var(arm_restrict_it) Init(2) Save
+ Generate IT blocks appropriate for ARMv8.
+ 
+ mold-rtx-costs
+@@ -275,5 +279,5 @@ Target Report Var(target_slow_flash_data) Init(0)
+ Assume loading data from flash is slower than fetching instructions.
+ 
+ masm-syntax-unified
+-Target Report Var(inline_asm_unified) Init(0)
++Target Report Var(inline_asm_unified) Init(0) Save
+ Assume unified syntax for Thumb inline assembly code.
+--- a/src/gcc/config/arm/arm_neon_builtins.def
++++ b/src/gcc/config/arm/arm_neon_builtins.def
+@@ -67,28 +67,28 @@ VAR8 (BINOP, vqshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+ VAR8 (BINOP, vqshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+ VAR8 (BINOP, vqrshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+ VAR8 (BINOP, vqrshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vshrn_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vrshrn_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqshrns_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqshrnu_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqrshrns_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqrshrnu_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqshrun_n, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vqrshrun_n, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (GETLANE, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR3 (GETLANE, vshlls_n, v8qi, v4hi, v2si)
+-VAR3 (GETLANE, vshllu_n, v8qi, v4hi, v2si)
+-VAR8 (SETLANE, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (SETLANE, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (SETLANE, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (SETLANE, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vshrn_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vrshrn_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqshrns_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqshrnu_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR3 (BINOP_IMM, vshlls_n, v8qi, v4hi, v2si)
++VAR3 (BINOP_IMM, vshllu_n, v8qi, v4hi, v2si)
++VAR8 (TERNOP_IMM, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+ VAR2 (BINOP, vsub, v2sf, v4sf)
+ VAR3 (BINOP, vsubls, v8qi, v4hi, v2si)
+ VAR3 (BINOP, vsublu, v8qi, v4hi, v2si)
+@@ -140,8 +140,8 @@ VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
+ VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
+ VAR2 (BINOP, vrecps, v2sf, v4sf)
+ VAR2 (BINOP, vrsqrts, v2sf, v4sf)
+-VAR8 (SETLANE, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+-VAR8 (SETLANE, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
++VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+ VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
+ VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
+ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
+@@ -162,7 +162,7 @@ VAR10 (SETLANE, vset_lane,
+ VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
+ VAR10 (UNOP, vdup_n,
+ 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+-VAR10 (BINOP, vdup_lane,
++VAR10 (GETLANE, vdup_lane,
+ 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+ VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
+ VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
+@@ -174,23 +174,23 @@ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di)
+ VAR3 (UNOP, vmovls, v8qi, v4hi, v2si)
+ VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si)
+ VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR2 (LANEMAC, vmlals_lane, v4hi, v2si)
+-VAR2 (LANEMAC, vmlalu_lane, v4hi, v2si)
+-VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si)
+-VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR2 (LANEMAC, vmlsls_lane, v4hi, v2si)
+-VAR2 (LANEMAC, vmlslu_lane, v4hi, v2si)
+-VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si)
++VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
++VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si)
++VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si)
++VAR2 (MAC_LANE, vqdmlal_lane, v4hi, v2si)
++VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
++VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si)
++VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si)
++VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si)
+ VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR6 (LANEMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR2 (LANEMAC, vmlals_n, v4hi, v2si)
+-VAR2 (LANEMAC, vmlalu_n, v4hi, v2si)
+-VAR2 (LANEMAC, vqdmlal_n, v4hi, v2si)
+-VAR6 (LANEMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+-VAR2 (LANEMAC, vmlsls_n, v4hi, v2si)
+-VAR2 (LANEMAC, vmlslu_n, v4hi, v2si)
+-VAR2 (LANEMAC, vqdmlsl_n, v4hi, v2si)
++VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
++VAR2 (MAC_N, vmlals_n, v4hi, v2si)
++VAR2 (MAC_N, vmlalu_n, v4hi, v2si)
++VAR2 (MAC_N, vqdmlal_n, v4hi, v2si)
++VAR6 (MAC_N, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
++VAR2 (MAC_N, vmlsls_n, v4hi, v2si)
++VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
++VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
+ VAR10 (SETLANE, vext,
+ 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+ VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
+--- a/src/gcc/config/arm/cortex-a53.md
++++ b/src/gcc/config/arm/cortex-a53.md
+@@ -360,7 +360,7 @@
+ ;; Crude Advanced SIMD approximation.
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+-(define_insn_reservation "cortex_53_advsimd" 4
++(define_insn_reservation "cortex_a53_advsimd" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "is_neon_type" "yes"))
+   "cortex_a53_simd0")
+--- a/src/gcc/config/arm/driver-arm.c
++++ b/src/gcc/config/arm/driver-arm.c
+@@ -35,6 +35,9 @@ static struct vendor_cpu arm_cpu_table[] = {
+     {"0xb02", "armv6k", "mpcore"},
+     {"0xb36", "armv6j", "arm1136j-s"},
+     {"0xb56", "armv6t2", "arm1156t2-s"},
++    /* armv6kz is the correct spelling for ARMv6KZ but may not be supported in
++       the version of binutils used.  The incorrect spelling is supported in
++       legacy and current binutils so that is used instead.  */
+     {"0xb76", "armv6zk", "arm1176jz-s"},
+     {"0xc05", "armv7-a", "cortex-a5"},
+     {"0xc07", "armv7ve", "cortex-a7"},
+--- a/src/gcc/config/arm/elf.h
++++ b/src/gcc/config/arm/elf.h
+@@ -120,7 +120,6 @@
+   { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork", "fno-leading-underscore" }
+ #endif
+ 
+-#define TARGET_ASM_FILE_START_APP_OFF true
+ #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+ 
+ 
+--- a/src/gcc/config/arm/iterators.md
++++ b/src/gcc/config/arm/iterators.md
+@@ -181,39 +181,53 @@
+ ;; compare a second time.
+ (define_code_iterator LTUGEU [ltu geu])
+ 
++;; The signed gt, ge comparisons
++(define_code_iterator GTGE [gt ge])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
++;; The unsigned gt, ge comparisons
++(define_code_iterator GTUGEU [gtu geu])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
++;; Comparisons for vc<cmp>
++(define_code_iterator COMPARISONS [eq gt ge le lt])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
+ ;; A list of ...
+-(define_code_iterator ior_xor [ior xor])
++(define_code_iterator IOR_XOR [ior xor])
+ 
+ ;; Operations on two halves of a quadword vector.
+-(define_code_iterator vqh_ops [plus smin smax umin umax])
++(define_code_iterator VQH_OPS [plus smin smax umin umax])
+ 
+ ;; Operations on two halves of a quadword vector,
+ ;; without unsigned variants (for use with *SFmode pattern).
+-(define_code_iterator vqhs_ops [plus smin smax])
++(define_code_iterator VQHS_OPS [plus smin smax])
+ 
+ ;; A list of widening operators
+ (define_code_iterator SE [sign_extend zero_extend])
+ 
+ ;; Right shifts
+-(define_code_iterator rshifts [ashiftrt lshiftrt])
++(define_code_iterator RSHIFTS [ashiftrt lshiftrt])
+ 
+ ;; Iterator for integer conversions
+ (define_code_iterator FIXUORS [fix unsigned_fix])
+ 
+ ;; Binary operators whose second operand can be shifted.
+-(define_code_iterator shiftable_ops [plus minus ior xor and])
++(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])
+ 
+-;; plus and minus are the only shiftable_ops for which Thumb2 allows
++;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
+ ;; a stack pointer opoerand.  The minus operation is a candidate for an rsub
+ ;; and hence only plus is supported.
+ (define_code_attr t2_binop0
+   [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
+ 
+-;; The instruction to use when a shiftable_ops has a shift operation as
++;; The instruction to use when a SHIFTABLE_OPS has a shift operation as
+ ;; its first operand.
+ (define_code_attr arith_shift_insn
+   [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
+ 
++(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le")
++                          (gtu "gt") (geu "ge")])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
++(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
+ ;;----------------------------------------------------------------------------
+ ;; Int iterators
+ ;;----------------------------------------------------------------------------
+@@ -221,6 +235,10 @@
+ (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
+                             UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
+ 
++(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
++(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT])
 +
-+__extension__ static __inline void __attribute__ ((__always_inline__))
-+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
-+{
-+  *__a = __aarch64_vget_lane_any (__b, __lane);
-+}
+ (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
+ 
+ (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
+@@ -677,6 +695,11 @@
+ 
+ ])
+ 
++(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")
++                              (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")
++                              (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")
++                              (UNSPEC_VCAGT "gt")])
 +
- /* vstn */
+ (define_int_attr r [
+   (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r")
+   (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "")
+@@ -774,7 +797,7 @@
+                           (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])
+ 
+ ;; Both kinds of return insn.
+-(define_code_iterator returns [return simple_return])
++(define_code_iterator RETURNS [return simple_return])
+ (define_code_attr return_str [(return "") (simple_return "simple_")])
+ (define_code_attr return_simple_p [(return "false") (simple_return "true")])
+ (define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
+--- a/src/gcc/config/arm/iwmmxt.md
++++ b/src/gcc/config/arm/iwmmxt.md
+@@ -107,8 +107,8 @@
+ )
+ 
+ (define_insn "*iwmmxt_arm_movdi"
+-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv")
+-        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))]
++  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
++        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,r,y,Uy,y,  r,*w,*w,*Uvi,*w"))]
+   "TARGET_REALLY_IWMMXT
+    && (   register_operand (operands[0], DImode)
+        || register_operand (operands[1], DImode))"
+--- a/src/gcc/config/arm/linux-eabi.h
++++ b/src/gcc/config/arm/linux-eabi.h
+@@ -77,6 +77,23 @@
+     %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \
+     %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"
+ 
++/* For ARM musl currently supports four dynamic linkers:
++   - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI
++   - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI
++   - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB
++   - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB
++   musl does not support the legacy OABI mode.
++   All the dynamic linkers live in /lib.
++   We default to soft-float, EL. */
++#undef  MUSL_DYNAMIC_LINKER
++#if TARGET_BIG_ENDIAN_DEFAULT
++#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}"
++#else
++#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}"
++#endif
++#define MUSL_DYNAMIC_LINKER \
++  "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1"
++
+ /* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
+    use the GNU/Linux version, not the generic BPABI version.  */
+ #undef  LINK_SPEC
+@@ -107,6 +124,7 @@
+ 
+ #undef	ENDFILE_SPEC
+ #define ENDFILE_SPEC \
++  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
+   LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+ 
+ /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
+--- a/src/gcc/config/arm/neon.md
++++ b/src/gcc/config/arm/neon.md
+@@ -1114,7 +1114,7 @@
+ ;; lshrdi3_neon
+ (define_insn_and_split "<shift>di3_neon"
+   [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?w,?w")
+-	(rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
++	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
+ 		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
+    (clobber (match_scratch:SI 3				     "=2r, X, &r, X,2r, X"))
+    (clobber (match_scratch:SI 4				     "= X, X, &r, X, X, X"))
+@@ -1194,71 +1194,6 @@
+   [(set_attr "type" "neon_add_widen")]
+ )
+ 
+-;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+-;; shift-count granularity. That's good enough for the middle-end's current
+-;; needs.
+-
+-;; Note that it's not safe to perform such an operation in big-endian mode,
+-;; due to element-ordering issues.
+-
+-(define_expand "vec_shr_<mode>"
+-  [(match_operand:VDQ 0 "s_register_operand" "")
+-   (match_operand:VDQ 1 "s_register_operand" "")
+-   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+-{
+-  rtx zero_reg;
+-  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+-  const int width = GET_MODE_BITSIZE (<MODE>mode);
+-  const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+-  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+-    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+-
+-  if (num_bits == width)
+-    {
+-      emit_move_insn (operands[0], operands[1]);
+-      DONE;
+-    }
+-
+-  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+-  operands[0] = gen_lowpart (bvecmode, operands[0]);
+-  operands[1] = gen_lowpart (bvecmode, operands[1]);
+-
+-  emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+-		      GEN_INT (num_bits / BITS_PER_UNIT)));
+-  DONE;
+-})
+-
+-(define_expand "vec_shl_<mode>"
+-  [(match_operand:VDQ 0 "s_register_operand" "")
+-   (match_operand:VDQ 1 "s_register_operand" "")
+-   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+-{
+-  rtx zero_reg;
+-  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+-  const int width = GET_MODE_BITSIZE (<MODE>mode);
+-  const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+-  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+-    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+-
+-  if (num_bits == 0)
+-    {
+-      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+-      DONE;
+-    }
+-
+-  num_bits = width - num_bits;
+-
+-  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+-  operands[0] = gen_lowpart (bvecmode, operands[0]);
+-  operands[1] = gen_lowpart (bvecmode, operands[1]);
+-
+-  emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+-		      GEN_INT (num_bits / BITS_PER_UNIT)));
+-  DONE;
+-})
+-
+ ;; Helpers for quad-word reduction operations
+ 
+ ; Add (or smin, smax...) the low N/2 elements of the N-element vector
+@@ -1267,7 +1202,7 @@
+ 
+ (define_insn "quad_halves_<code>v4si"
+   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+-        (vqh_ops:V2SI
++        (VQH_OPS:V2SI
+           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+                            (parallel [(const_int 0) (const_int 1)]))
+           (vec_select:V2SI (match_dup 1)
+@@ -1280,7 +1215,7 @@
+ 
+ (define_insn "quad_halves_<code>v4sf"
+   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+-        (vqhs_ops:V2SF
++        (VQHS_OPS:V2SF
+           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+                            (parallel [(const_int 0) (const_int 1)]))
+           (vec_select:V2SF (match_dup 1)
+@@ -1293,7 +1228,7 @@
  
- __extension__ static __inline void
-@@ -23887,7 +23769,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vtst_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
-+  return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
- }
+ (define_insn "quad_halves_<code>v8hi"
+   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
+-        (vqh_ops:V4HI
++        (VQH_OPS:V4HI
+           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+                            (parallel [(const_int 0) (const_int 1)
+ 				      (const_int 2) (const_int 3)]))
+@@ -1308,7 +1243,7 @@
  
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-@@ -23911,7 +23793,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b)
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vtst_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
-+  return ((__a & __b) != __AARCH64_UINT64_C (0));
- }
+ (define_insn "quad_halves_<code>v16qi"
+   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
+-        (vqh_ops:V8QI
++        (VQH_OPS:V8QI
+           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+                            (parallel [(const_int 0) (const_int 1)
+ 				      (const_int 2) (const_int 3)
+@@ -2200,134 +2135,140 @@
+   [(set_attr "type" "neon_sub_halve_narrow_q")]
+ )
  
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
---- a/src//dev/null
-+++ b/src/gcc/config/aarch64/driver-aarch64.c
-@@ -0,0 +1,307 @@
-+/* Native CPU detection for aarch64.
-+   Copyright (C) 2015 Free Software Foundation, Inc.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify
-+   it under the terms of the GNU General Public License as published by
-+   the Free Software Foundation; either version 3, or (at your option)
-+   any later version.
-+
-+   GCC is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+   GNU General Public License for more details.
-+
-+   You should have received a copy of the GNU General Public License
-+   along with GCC; see the file COPYING3.  If not see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#include "config.h"
-+#include "system.h"
-+
-+struct arch_extension
-+{
-+  const char *ext;
-+  const char *feat_string;
-+};
-+
-+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
-+  { EXT_NAME, FEATURE_STRING },
-+static struct arch_extension ext_to_feat_string[] =
-+{
-+#include "aarch64-option-extensions.def"
-+};
-+#undef AARCH64_OPT_EXTENSION
-+
-+
-+struct aarch64_core_data
-+{
-+  const char* name;
-+  const char* arch;
-+  const char* implementer_id;
-+  const char* part_no;
-+};
-+
-+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
-+  { CORE_NAME, #ARCH, IMP, PART },
-+
-+static struct aarch64_core_data cpu_data [] =
-+{
-+#include "aarch64-cores.def"
-+  { NULL, NULL, NULL, NULL }
-+};
-+
-+#undef AARCH64_CORE
-+
-+struct aarch64_arch
-+{
-+  const char* id;
-+  const char* name;
-+};
-+
-+#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
-+  { #ARCH, NAME  },
-+
-+static struct aarch64_arch aarch64_arches [] =
-+{
-+#include "aarch64-arches.def"
-+  {NULL, NULL}
-+};
-+
-+#undef AARCH64_ARCH
-+
-+/* Return the full architecture name string corresponding to the
-+   identifier ID.  */
-+
-+static const char*
-+get_arch_name_from_id (const char* id)
-+{
-+  unsigned int i = 0;
-+
-+  for (i = 0; aarch64_arches[i].id != NULL; i++)
-+    {
-+      if (strcmp (id, aarch64_arches[i].id) == 0)
-+        return aarch64_arches[i].name;
-+    }
-+
-+  return NULL;
-+}
-+
-+
-+/* Check wether the string CORE contains the same CPU part numbers
-+   as BL_STRING.  For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03"
-+   should return true.  */
-+
-+static bool
-+valid_bL_string_p (const char** core, const char* bL_string)
-+{
-+  return strstr (bL_string, core[0]) != NULL
-+         && strstr (bL_string, core[1]) != NULL;
-+}
-+
-+/*  Return true iff ARR contains STR in one of its two elements.  */
-+
-+static bool
-+contains_string_p (const char** arr, const char* str)
-+{
-+  bool res = false;
-+
-+  if (arr[0] != NULL)
-+    {
-+      res = strstr (arr[0], str) != NULL;
-+      if (res)
-+        return res;
-+
-+      if (arr[1] != NULL)
-+        return strstr (arr[1], str) != NULL;
-+    }
-+
-+  return false;
+-(define_insn "neon_vceq<mode>"
+-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+-	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
+-          UNSPEC_VCEQ))]
++;; These may expand to an UNSPEC pattern when a floating point mode is used
++;; without unsafe math optimizations.
++(define_expand "neon_vc<cmp_op><mode>"
++  [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
++     (neg:<V_cmp_result>
++       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
++                         (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
+   "TARGET_NEON"
+-  "@
+-  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+-  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
+-  [(set (attr "type")
+-      (if_then_else (match_test "<Is_float_mode>")
+-                    (const_string "neon_fp_compare_s<q>")
+-                    (if_then_else (match_operand 2 "zero_operand")
+-                      (const_string "neon_compare_zero<q>")
+-                      (const_string "neon_compare<q>"))))]
++  {
++    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
++       are enabled.  */
++    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
++        && !flag_unsafe_math_optimizations)
++      {
++        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
++           we define gen_neon_vceq<mode>_insn_unspec only for float modes
++           whereas this expander iterates over the integer modes as well,
++           but we will never expand to UNSPECs for the integer comparisons.  */
++        switch (<MODE>mode)
++          {
++            case V2SFmode:
++              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
++                                                              operands[1],
++                                                              operands[2]));
++              break;
++            case V4SFmode:
++              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
++                                                              operands[1],
++                                                              operands[2]));
++              break;
++            default:
++              gcc_unreachable ();
++          }
++      }
++    else
++      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
++                                                 operands[1],
++                                                 operands[2]));
++    DONE;
++  }
+ )
+ 
+-(define_insn "neon_vcge<mode>"
++(define_insn "neon_vc<cmp_op><mode>_insn"
+   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+-	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
+-          UNSPEC_VCGE))]
+-  "TARGET_NEON"
+-  "@
+-  vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+-  vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
++        (neg:<V_cmp_result>
++          (COMPARISONS:<V_cmp_result>
++            (match_operand:VDQW 1 "s_register_operand" "w,w")
++            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
++  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
++                    && !flag_unsafe_math_optimizations)"
++  {
++    char pattern[100];
++    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
++                      " %%<V_reg>1, %s",
++                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
++                         ? "f" : "<cmp_type>",
++                       which_alternative == 0
++                         ? "%<V_reg>2" : "#0");
++    output_asm_insn (pattern, operands);
++    return "";
++  }
+   [(set (attr "type")
+-     (if_then_else (match_test "<Is_float_mode>")
+-                   (const_string "neon_fp_compare_s<q>")
+-                    (if_then_else (match_operand 2 "zero_operand")
++        (if_then_else (match_operand 2 "zero_operand")
+                       (const_string "neon_compare_zero<q>")
+-                      (const_string "neon_compare<q>"))))]
++                      (const_string "neon_compare<q>")))]
+ )
+ 
+-(define_insn "neon_vcgeu<mode>"
+-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+-	   (match_operand:VDQIW 2 "s_register_operand" "w")]
+-          UNSPEC_VCGEU))]
+-  "TARGET_NEON"
+-  "vcge.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+-  [(set_attr "type" "neon_compare<q>")]
+-)
+-
+-(define_insn "neon_vcgt<mode>"
++(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
+   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+         (unspec:<V_cmp_result>
+-	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+-	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
+-          UNSPEC_VCGT))]
++	  [(match_operand:VCVTF 1 "s_register_operand" "w,w")
++	   (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
++          NEON_VCMP))]
+   "TARGET_NEON"
+-  "@
+-  vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+-  vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
+-  [(set (attr "type")
+-     (if_then_else (match_test "<Is_float_mode>")
+-                   (const_string "neon_fp_compare_s<q>")
+-                    (if_then_else (match_operand 2 "zero_operand")
+-                      (const_string "neon_compare_zero<q>")
+-                      (const_string "neon_compare<q>"))))]
++  {
++    char pattern[100];
++    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
++                       " %%<V_reg>1, %s",
++                       which_alternative == 0
++                         ? "%<V_reg>2" : "#0");
++    output_asm_insn (pattern, operands);
++    return "";
 +}
++  [(set_attr "type" "neon_fp_compare_s<q>")]
+ )
+ 
+-(define_insn "neon_vcgtu<mode>"
++(define_insn "neon_vc<cmp_op>u<mode>"
+   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+-	   (match_operand:VDQIW 2 "s_register_operand" "w")]
+-          UNSPEC_VCGTU))]
++        (neg:<V_cmp_result>
++          (GTUGEU:<V_cmp_result>
++	    (match_operand:VDQIW 1 "s_register_operand" "w")
++	    (match_operand:VDQIW 2 "s_register_operand" "w"))))]
+   "TARGET_NEON"
+-  "vcgt.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "type" "neon_compare<q>")]
+ )
+ 
+-;; VCLE and VCLT only support comparisons with immediate zero (register
+-;; variants are VCGE and VCGT with operands reversed).
+-
+-(define_insn "neon_vcle<mode>"
+-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQW 1 "s_register_operand" "w")
+-	   (match_operand:VDQW 2 "zero_operand" "Dz")]
+-          UNSPEC_VCLE))]
++(define_expand "neon_vca<cmp_op><mode>"
++  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
++        (neg:<V_cmp_result>
++          (GTGE:<V_cmp_result>
++            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
++            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
+   "TARGET_NEON"
+-  "vcle.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
+-  [(set (attr "type")
+-      (if_then_else (match_test "<Is_float_mode>")
+-                    (const_string "neon_fp_compare_s<q>")
+-                    (if_then_else (match_operand 2 "zero_operand")
+-                      (const_string "neon_compare_zero<q>")
+-                      (const_string "neon_compare<q>"))))]
+-)
+-
+-(define_insn "neon_vclt<mode>"
+-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+-        (unspec:<V_cmp_result>
+-	  [(match_operand:VDQW 1 "s_register_operand" "w")
+-	   (match_operand:VDQW 2 "zero_operand" "Dz")]
+-          UNSPEC_VCLT))]
+-  "TARGET_NEON"
+-  "vclt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
+-  [(set (attr "type")
+-      (if_then_else (match_test "<Is_float_mode>")
+-                    (const_string "neon_fp_compare_s<q>")
+-                    (if_then_else (match_operand 2 "zero_operand")
+-                      (const_string "neon_compare_zero<q>")
+-                      (const_string "neon_compare<q>"))))]
++  {
++    if (flag_unsafe_math_optimizations)
++      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
++                                                  operands[2]));
++    else
++      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
++                                                         operands[1],
++                                                         operands[2]));
++    DONE;
++  }
+ )
+ 
+-(define_insn "neon_vcage<mode>"
++(define_insn "neon_vca<cmp_op><mode>_insn"
+   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+-        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+-		                (match_operand:VCVTF 2 "s_register_operand" "w")]
+-                               UNSPEC_VCAGE))]
+-  "TARGET_NEON"
+-  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++        (neg:<V_cmp_result>
++          (GTGE:<V_cmp_result>
++            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
++            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
++  "TARGET_NEON && flag_unsafe_math_optimizations"
++  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "type" "neon_fp_compare_s<q>")]
+ )
+ 
+-(define_insn "neon_vcagt<mode>"
++(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
+   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+ 		                (match_operand:VCVTF 2 "s_register_operand" "w")]
+-                               UNSPEC_VCAGT))]
++                               NEON_VACMP))]
+   "TARGET_NEON"
+-  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "type" "neon_fp_compare_s<q>")]
+ )
+ 
+@@ -2722,8 +2663,6 @@
+    (match_operand:SI 2 "immediate_operand" "")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
+-
+   if (BYTES_BIG_ENDIAN)
+     {
+       /* The intrinsics are defined in terms of a model where the
+@@ -2753,8 +2692,6 @@
+    (match_operand:SI 2 "immediate_operand" "")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
+-
+   if (BYTES_BIG_ENDIAN)
+     {
+       /* The intrinsics are defined in terms of a model where the
+@@ -2784,7 +2721,6 @@
+    (match_operand:SI 2 "immediate_operand" "")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, 1);
+   emit_move_insn (operands[0], operands[1]);
+   DONE;
+ })
+@@ -2795,18 +2731,11 @@
+    (match_operand:SI 2 "immediate_operand" "")]
+   "TARGET_NEON"
+ {
+-  switch (INTVAL (operands[2]))
+-    {
+-    case 0:
+-      emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
+-      break;
+-    case 1:
+-      emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
+-      break;
+-    default:
+-      neon_lane_bounds (operands[2], 0, 1);
+-      FAIL;
+-    }
++  int lane = INTVAL (operands[2]);
++  gcc_assert ((lane ==0) || (lane == 1));
++  emit_move_insn (operands[0], lane == 0
++				? gen_lowpart (DImode, operands[1])
++				: gen_highpart (DImode, operands[1]));
+   DONE;
+ })
+ 
+@@ -2818,7 +2747,6 @@
+   "TARGET_NEON"
+ {
+   unsigned int elt = INTVAL (operands[3]);
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+ 
+   if (BYTES_BIG_ENDIAN)
+     {
+@@ -2841,7 +2769,6 @@
+    (match_operand:SI 3 "immediate_operand" "i")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, 1);
+   emit_move_insn (operands[0], operands[1]);
+   DONE;
+ })
+@@ -2923,7 +2850,6 @@
+    (match_operand:SI 2 "immediate_operand" "i")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
+   if (BYTES_BIG_ENDIAN)
+     {
+       unsigned int elt = INTVAL (operands[2]);
+@@ -2944,7 +2870,6 @@
+    (match_operand:SI 2 "immediate_operand" "i")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, 1);
+   emit_move_insn (operands[0], operands[1]);
+   DONE;
+ })
+@@ -2956,7 +2881,6 @@
+    (match_operand:SI 2 "immediate_operand" "i")]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[2], 0, 1);
+   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
+   DONE;
+ })
+@@ -3156,7 +3080,6 @@
+                     UNSPEC_VMUL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
+ }
+   [(set (attr "type")
+@@ -3174,7 +3097,6 @@
+                     UNSPEC_VMUL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
+   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
+ }
+   [(set (attr "type")
+@@ -3192,7 +3114,6 @@
+                           VMULL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
+ }
+   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
+@@ -3207,7 +3128,6 @@
+                           UNSPEC_VQDMULL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
+ }
+   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
+@@ -3222,7 +3142,6 @@
+                       VQDMULH_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
+ }
+   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
+@@ -3237,7 +3156,6 @@
+                       VQDMULH_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
+ }
+   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
+@@ -3253,7 +3171,6 @@
+                      UNSPEC_VMLA_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+ }
+   [(set (attr "type")
+@@ -3272,7 +3189,6 @@
+                      UNSPEC_VMLA_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+ }
+   [(set (attr "type")
+@@ -3291,7 +3207,6 @@
+                           VMLAL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+ }
+   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
+@@ -3307,7 +3222,6 @@
+                           UNSPEC_VQDMLAL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+ }
+   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
+@@ -3323,7 +3237,6 @@
+                     UNSPEC_VMLS_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+ }
+   [(set (attr "type")
+@@ -3342,7 +3255,6 @@
+                     UNSPEC_VMLS_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+ }
+   [(set (attr "type")
+@@ -3361,7 +3273,6 @@
+                           VMLSL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+ }
+   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
+@@ -3377,7 +3288,6 @@
+                           UNSPEC_VQDMLSL_LANE))]
+   "TARGET_NEON"
+ {
+-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+ }
+   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
+--- a/src/gcc/config/arm/sync.md
++++ b/src/gcc/config/arm/sync.md
+@@ -50,14 +50,11 @@
+   {
+     if (TARGET_HAVE_DMB)
+       {
+-	/* Note we issue a system level barrier. We should consider issuing
+-	   a inner shareabilty zone barrier here instead, ie. "DMB ISH".  */
+-	/* ??? Differentiate based on SEQ_CST vs less strict?  */
+-	return "dmb\tsy";
++	return "dmb\\tish";
+       }
+ 
+     if (TARGET_HAVE_DMB_MCR)
+-      return "mcr\tp15, 0, r0, c7, c10, 5";
++      return "mcr\\tp15, 0, r0, c7, c10, 5";
+ 
+     gcc_unreachable ();
+   }
+--- a/src/gcc/config/arm/thumb2.md
++++ b/src/gcc/config/arm/thumb2.md
+@@ -300,7 +300,7 @@
+    ldr%?\\t%0, %1
+    str%?\\t%1, %0
+    str%?\\t%1, %0"
+-  [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")
++  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1")
+    (set_attr "length" "2,4,2,4,4,4,4,4,4")
+    (set_attr "predicable" "yes")
+    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
+@@ -486,12 +486,12 @@
+ )
+ 
+ (define_insn_and_split "*thumb2_movsicc_insn"
+-  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r")
++  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
+ 	(if_then_else:SI
+ 	 (match_operator 3 "arm_comparison_operator"
+ 	  [(match_operand 4 "cc_register" "") (const_int 0)])
+-	 (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r")
+-	 (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))]
++	 (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r")
++	 (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))]
+   "TARGET_THUMB2"
+   "@
+    it\\t%D3\;mov%D3\\t%0, %2
+@@ -504,12 +504,14 @@
+    #
+    #
+    #
++   #
+    #"
+    ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+-   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+-   ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+-   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
+-   ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
++   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
++   ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
++   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
++   ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
++   ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+@@ -540,10 +542,30 @@
+                                                operands[2])));
+     DONE;
+   }
+-  [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")
+-   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")
++  [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6")
++   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes")
+    (set_attr "conds" "use")
+-   (set_attr "type" "multiple")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (if_then_else (match_operand 1 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (const_string "mvn_imm")
++                          (if_then_else (match_operand 1 "const_int_operand" "")
++                                        (const_string "mov_imm")
++                                        (const_string "mov_reg"))
++                          (const_string "mvn_imm")
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")
++                          (const_string "multiple")])]
+ )
+ 
+ (define_insn "*thumb2_movsfcc_soft_insn"
+@@ -1182,7 +1204,11 @@
+   "
+   [(set_attr "predicable" "yes")
+    (set_attr "length" "2")
+-   (set_attr "type" "alu_sreg")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "alu_imm")
++                                        (const_string "alu_sreg"))
++                          (const_string "alu_imm")])]
+ )
+ 
+ (define_insn "*thumb2_subsi_short"
+@@ -1247,14 +1273,21 @@
+   "
+   [(set_attr "conds" "set")
+    (set_attr "length" "2,2,4")
+-   (set_attr "type" "alu_sreg")]
++   (set_attr_alternative "type"
++                         [(if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "alus_imm")
++                                        (const_string "alus_sreg"))
++                          (const_string "alus_imm")
++                          (if_then_else (match_operand 2 "const_int_operand" "")
++                                        (const_string "alus_imm")
++                                        (const_string "alus_sreg"))])]
+ )
+ 
+ (define_insn "*thumb2_addsi3_compare0_scratch"
+   [(set (reg:CC_NOOV CC_REGNUM)
+ 	(compare:CC_NOOV
+-	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,  r,r")
+-		   (match_operand:SI 1 "arm_add_operand"    "Pv,l,IL,r"))
++	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,  r")
++		   (match_operand:SI 1 "arm_add_operand"    "lPv,rIL"))
+ 	  (const_int 0)))]
+   "TARGET_THUMB2"
+   "*
+@@ -1271,8 +1304,10 @@
+       return \"cmn\\t%0, %1\";
+   "
+   [(set_attr "conds" "set")
+-   (set_attr "length" "2,2,4,4")
+-   (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")]
++   (set_attr "length" "2,4")
++   (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "")
++                                    (const_string "alus_imm")
++                                    (const_string "alus_sreg")))]
+ )
+ 
+ (define_insn "*thumb2_mulsi_short"
+--- a/src/gcc/config/arm/unknown-elf.h
++++ b/src/gcc/config/arm/unknown-elf.h
+@@ -32,7 +32,9 @@
+ #define UNKNOWN_ELF_STARTFILE_SPEC	" crti%O%s crtbegin%O%s crt0%O%s"
+ 
+ #undef  STARTFILE_SPEC
+-#define STARTFILE_SPEC	UNKNOWN_ELF_STARTFILE_SPEC
++#define STARTFILE_SPEC	\
++  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
++  UNKNOWN_ELF_STARTFILE_SPEC
+ 
+ #define UNKNOWN_ELF_ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+ 
+@@ -80,7 +82,9 @@
+ 									\
+       ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+       ASM_OUTPUT_LABEL (FILE, NAME);					\
+-      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1);		\
++      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1);		\
++      fprintf (FILE, "\t.size\t%s, %d\n",				\
++	       NAME, SIZE ? (int) SIZE : 1);				\
+     }									\
+   while (0)
+ 
+--- a/src/gcc/config/arm/vxworks.h
++++ b/src/gcc/config/arm/vxworks.h
+@@ -40,7 +40,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+       builtin_define ("CPU=ARMARCH5");		\
+     else if (arm_arch4)				\
+       {						\
+-	if (thumb_code)				\
++	if (TARGET_THUMB)			\
+ 	  builtin_define ("CPU=ARMARCH4_T");	\
+ 	else					\
+ 	  builtin_define ("CPU=ARMARCH4");	\
+--- a/src/gcc/config/c6x/c6x.c
++++ b/src/gcc/config/c6x/c6x.c
+@@ -3532,7 +3532,7 @@ try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
+   best_reg =
+     find_rename_reg (this_head, super_class, &unavailable, old_reg, true);
+ 
+-  regrename_do_replace (this_head, best_reg);
++  gcc_assert (regrename_do_replace (this_head, best_reg));
+ 
+   count_unit_reqs (new_reqs, head, PREV_INSN (tail));
+   merge_unit_reqs (new_reqs);
+@@ -3545,7 +3545,7 @@ try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
+ 	       unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
+     }
+   if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
+-    regrename_do_replace (this_head, old_reg);
++    gcc_assert (regrename_do_replace (this_head, old_reg));
+   else
+     memcpy (reqs, new_reqs, sizeof (unit_req_table));
+ 
+--- a/src/gcc/config/glibc-stdint.h
++++ b/src/gcc/config/glibc-stdint.h
+@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ <http://www.gnu.org/licenses/>.  */
+ 
++/* Systems using musl libc should use this header and make sure
++   OPTION_MUSL is defined correctly before using the TYPE macros. */
++#ifndef OPTION_MUSL
++#define OPTION_MUSL 0
++#endif
 +
-+/* This will be called by the spec parser in gcc.c when it sees
-+   a %:local_cpu_detect(args) construct.  Currently it will be called
-+   with either "arch", "cpu" or "tune" as argument depending on if
-+   -march=native, -mcpu=native or -mtune=native is to be substituted.
-+
-+   It returns a string containing new command line parameters to be
-+   put at the place of the above two options, depending on what CPU
-+   this is executed.  E.g. "-march=armv8-a" on a Cortex-A57 for
-+   -march=native.  If the routine can't detect a known processor,
-+   the -march or -mtune option is discarded.
-+
-+   For -mtune and -mcpu arguments it attempts to detect the CPU or
-+   a big.LITTLE system.
-+   ARGC and ARGV are set depending on the actual arguments given
-+   in the spec.  */
-+
-+const char *
-+host_detect_local_cpu (int argc, const char **argv)
-+{
-+  const char *arch_id = NULL;
-+  const char *res = NULL;
-+  static const int num_exts = ARRAY_SIZE (ext_to_feat_string);
-+  char buf[128];
-+  FILE *f = NULL;
-+  bool arch = false;
-+  bool tune = false;
-+  bool cpu = false;
-+  unsigned int i = 0;
-+  unsigned int core_idx = 0;
-+  const char* imps[2] = { NULL, NULL };
-+  const char* cores[2] = { NULL, NULL };
-+  unsigned int n_cores = 0;
-+  unsigned int n_imps = 0;
-+  bool processed_exts = false;
-+  const char *ext_string = "";
-+
-+  gcc_assert (argc);
-+
-+  if (!argv[0])
-+    goto not_found;
-+
-+  /* Are we processing -march, mtune or mcpu?  */
-+  arch = strcmp (argv[0], "arch") == 0;
-+  if (!arch)
-+    tune = strcmp (argv[0], "tune") == 0;
-+
-+  if (!arch && !tune)
-+    cpu = strcmp (argv[0], "cpu") == 0;
-+
-+  if (!arch && !tune && !cpu)
-+    goto not_found;
-+
-+  f = fopen ("/proc/cpuinfo", "r");
-+
-+  if (f == NULL)
-+    goto not_found;
-+
-+  /* Look through /proc/cpuinfo to determine the implementer
-+     and then the part number that identifies a particular core.  */
-+  while (fgets (buf, sizeof (buf), f) != NULL)
-+    {
-+      if (strstr (buf, "implementer") != NULL)
-+	{
-+	  for (i = 0; cpu_data[i].name != NULL; i++)
-+	    if (strstr (buf, cpu_data[i].implementer_id) != NULL
-+                && !contains_string_p (imps, cpu_data[i].implementer_id))
-+	      {
-+                if (n_imps == 2)
-+                  goto not_found;
-+
-+                imps[n_imps++] = cpu_data[i].implementer_id;
-+
-+                break;
-+	      }
-+          continue;
-+	}
-+
-+      if (strstr (buf, "part") != NULL)
-+	{
-+	  for (i = 0; cpu_data[i].name != NULL; i++)
-+	    if (strstr (buf, cpu_data[i].part_no) != NULL
-+                && !contains_string_p (cores, cpu_data[i].part_no))
-+	      {
-+                if (n_cores == 2)
-+                  goto not_found;
-+
-+                cores[n_cores++] = cpu_data[i].part_no;
-+	        core_idx = i;
-+	        arch_id = cpu_data[i].arch;
-+	        break;
-+	      }
-+          continue;
-+        }
-+      if (!tune && !processed_exts && strstr (buf, "Features") != NULL)
-+        {
-+          for (i = 0; i < num_exts; i++)
-+            {
-+              bool enabled = true;
-+              char *p = NULL;
-+              char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL);
-+
-+              p = strtok (feat_string, " ");
-+
-+              while (p != NULL)
-+                {
-+                  if (strstr (buf, p) == NULL)
-+                    {
-+                      enabled = false;
-+                      break;
-+                    }
-+                  p = strtok (NULL, " ");
-+                }
-+              ext_string = concat (ext_string, "+", enabled ? "" : "no",
-+                                   ext_to_feat_string[i].ext, NULL);
-+            }
-+          processed_exts = true;
-+        }
-+    }
-+
-+  fclose (f);
-+  f = NULL;
-+
-+  /* Weird cpuinfo format that we don't know how to handle.  */
-+  if (n_cores == 0 || n_cores > 2 || n_imps != 1)
-+    goto not_found;
-+
-+  if (arch && !arch_id)
-+    goto not_found;
-+
-+  if (arch)
-+    {
-+      const char* arch_name = get_arch_name_from_id (arch_id);
-+
-+      /* We got some arch indentifier that's not in aarch64-arches.def?  */
-+      if (!arch_name)
-+        goto not_found;
-+
-+      res = concat ("-march=", arch_name, NULL);
-+    }
-+  /* We have big.LITTLE.  */
-+  else if (n_cores == 2)
-+    {
-+      for (i = 0; cpu_data[i].name != NULL; i++)
-+        {
-+          if (strchr (cpu_data[i].part_no, '.') != NULL
-+              && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0
-+              && valid_bL_string_p (cores, cpu_data[i].part_no))
-+            {
-+              res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);
-+              break;
-+            }
-+        }
-+      if (!res)
-+        goto not_found;
-+    }
-+  /* The simple, non-big.LITTLE case.  */
-+  else
-+    {
-+      if (strncmp (cpu_data[core_idx].implementer_id, imps[0],
-+                   strlen (imps[0]) - 1) != 0)
-+        goto not_found;
+ #define SIG_ATOMIC_TYPE "int"
+ 
+ #define INT8_TYPE "signed char"
+@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ #define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+ 
+ #define INT_FAST8_TYPE "signed char"
+-#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+-#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
++#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
++#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
+ #define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+ #define UINT_FAST8_TYPE "unsigned char"
+-#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+-#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
++#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
++#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
+ #define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+ 
+ #define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+--- a/src/gcc/config/linux.h
++++ b/src/gcc/config/linux.h
+@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+ #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+ #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
+ #else
+ #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+ #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+ #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
+ #endif
+ 
+ #define GNU_USER_TARGET_OS_CPP_BUILTINS()			\
+@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+     } while (0)
+ 
+ /* Determine which dynamic linker to use depending on whether GLIBC or
+-   uClibc or Bionic is the default C library and whether
+-   -muclibc or -mglibc or -mbionic has been passed to change the default.  */
++   uClibc or Bionic or musl is the default C library and whether
++   -muclibc or -mglibc or -mbionic or -mmusl has been passed to change
++   the default.  */
+ 
+-#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3)	\
+-  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"
++#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4)	\
++  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}"
+ 
+ #if DEFAULT_LIBC == LIBC_GLIBC
+-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+-  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)
++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
++  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M)
+ #elif DEFAULT_LIBC == LIBC_UCLIBC
+-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+-  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)
++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
++  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M)
+ #elif DEFAULT_LIBC == LIBC_BIONIC
+-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+-  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)
++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
++  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M)
++#elif DEFAULT_LIBC == LIBC_MUSL
++#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
++  CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B)
+ #else
+ #error "Unsupported DEFAULT_LIBC"
+ #endif /* DEFAULT_LIBC */
+@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ #define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
+ #define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
+ #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
++/* Should be redefined for each target that supports musl.  */
++#define MUSL_DYNAMIC_LINKER "/dev/null"
++#define MUSL_DYNAMIC_LINKER32 "/dev/null"
++#define MUSL_DYNAMIC_LINKER64 "/dev/null"
++#define MUSL_DYNAMIC_LINKERX32 "/dev/null"
+ 
+ #define GNU_USER_DYNAMIC_LINKER						\
+   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER,	\
+-			 BIONIC_DYNAMIC_LINKER)
++			 BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER)
+ #define GNU_USER_DYNAMIC_LINKER32					\
+   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \
+-			 BIONIC_DYNAMIC_LINKER32)
++			 BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32)
+ #define GNU_USER_DYNAMIC_LINKER64					\
+   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \
+-			 BIONIC_DYNAMIC_LINKER64)
++			 BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64)
+ #define GNU_USER_DYNAMIC_LINKERX32					\
+   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \
+-			 BIONIC_DYNAMIC_LINKERX32)
++			 BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32)
+ 
+ /* Whether we have Bionic libc runtime */
+ #undef TARGET_HAS_BIONIC
+ #define TARGET_HAS_BIONIC (OPTION_BIONIC)
+ 
++/* musl avoids problematic includes by rearranging the include directories.
++ * Unfortunately, this is mostly duplicated from cppdefault.c */
++#if DEFAULT_LIBC == LIBC_MUSL
++#define INCLUDE_DEFAULTS_MUSL_GPP			\
++    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1,		\
++      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },		\
++    { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1,		\
++      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 },		\
++    { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1,	\
++      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },
 +
-+      res = concat ("-m", cpu ? "cpu" : "tune", "=",
-+                      cpu_data[core_idx].name, NULL);
-+    }
++#ifdef LOCAL_INCLUDE_DIR
++#define INCLUDE_DEFAULTS_MUSL_LOCAL			\
++    { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 },		\
++    { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 },
++#else
++#define INCLUDE_DEFAULTS_MUSL_LOCAL
++#endif
 +
-+  if (tune)
-+    return res;
++#ifdef PREFIX_INCLUDE_DIR
++#define INCLUDE_DEFAULTS_MUSL_PREFIX			\
++    { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0},
++#else
++#define INCLUDE_DEFAULTS_MUSL_PREFIX
++#endif
 +
-+  res = concat (res, ext_string, NULL);
++#ifdef CROSS_INCLUDE_DIR
++#define INCLUDE_DEFAULTS_MUSL_CROSS			\
++    { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0},
++#else
++#define INCLUDE_DEFAULTS_MUSL_CROSS
++#endif
 +
-+  return res;
++#ifdef TOOL_INCLUDE_DIR
++#define INCLUDE_DEFAULTS_MUSL_TOOL			\
++    { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0},
++#else
++#define INCLUDE_DEFAULTS_MUSL_TOOL
++#endif
 +
-+not_found:
-+  {
-+   /* If detection fails we ignore the option.
-+      Clean up and return empty string.  */
++#ifdef NATIVE_SYSTEM_HEADER_DIR
++#define INCLUDE_DEFAULTS_MUSL_NATIVE			\
++    { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 },	\
++    { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 },
++#else
++#define INCLUDE_DEFAULTS_MUSL_NATIVE
++#endif
 +
-+    if (f)
-+      fclose (f);
++#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT)
++# undef INCLUDE_DEFAULTS_MUSL_LOCAL
++# define INCLUDE_DEFAULTS_MUSL_LOCAL
++# undef INCLUDE_DEFAULTS_MUSL_NATIVE
++# define INCLUDE_DEFAULTS_MUSL_NATIVE
++#else
++# undef INCLUDE_DEFAULTS_MUSL_CROSS
++# define INCLUDE_DEFAULTS_MUSL_CROSS
++#endif
 +
-+    return "";
++#undef INCLUDE_DEFAULTS
++#define INCLUDE_DEFAULTS				\
++  {							\
++    INCLUDE_DEFAULTS_MUSL_GPP				\
++    INCLUDE_DEFAULTS_MUSL_PREFIX			\
++    INCLUDE_DEFAULTS_MUSL_CROSS				\
++    INCLUDE_DEFAULTS_MUSL_TOOL				\
++    INCLUDE_DEFAULTS_MUSL_NATIVE			\
++    { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 },		\
++    { 0, 0, 0, 0, 0, 0 }				\
 +  }
-+}
++#endif
 +
---- a/src//dev/null
-+++ b/src/gcc/config/aarch64/x-aarch64
-@@ -0,0 +1,3 @@
-+driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \
-+  $(CONFIG_H) $(SYSTEM_H)
-+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
---- a/src/gcc/config/alpha/linux.h
-+++ b/src/gcc/config/alpha/linux.h
-@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3.  If not see
+ #if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */
+ /* This is a *uclinux* target.  We don't define below macros to normal linux
+    versions, because doing so would require *uclinux* targets to include
+--- a/src/gcc/config/linux.opt
++++ b/src/gcc/config/linux.opt
+@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)
+ Use GNU C library
+ 
+ muclibc
+-Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)
++Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl)
+ Use uClibc C library
++
++mmusl
++Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic)
++Use musl C library
+--- a/src/gcc/config/mips/linux.h
++++ b/src/gcc/config/mips/linux.h
+@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3.  If not see
+ #define UCLIBC_DYNAMIC_LINKERN32 \
+   "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"
+ 
++#undef MUSL_DYNAMIC_LINKER32
++#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1"
++#undef MUSL_DYNAMIC_LINKER64
++#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1"
++#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1"
++
+ #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
+ #define GNU_USER_DYNAMIC_LINKERN32 \
+   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
+-                         BIONIC_DYNAMIC_LINKERN32)
++                         BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32)
+--- a/src/gcc/config/rs6000/linux.h
++++ b/src/gcc/config/rs6000/linux.h
+@@ -30,10 +30,14 @@
  #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
  #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
  #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
@@ -3226,5612 +12657,14406 @@
  #endif
  
  /* Determine what functions are present at the runtime;
---- a/src/gcc/config/arm/aarch-common-protos.h
-+++ b/src/gcc/config/arm/aarch-common-protos.h
-@@ -102,6 +102,8 @@ struct mem_cost_table
-   const int storef;		/* SFmode.  */
-   const int stored;		/* DFmode.  */
-   const int store_unaligned;	/* Extra for unaligned stores.  */
-+  const int loadv;		/* Vector load.  */
-+  const int storev;		/* Vector store.  */
- };
+--- a/src/gcc/config/rs6000/linux64.h
++++ b/src/gcc/config/rs6000/linux64.h
+@@ -299,10 +299,14 @@ extern int dot_symbols;
+ #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+ #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+ #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
+ #else
+ #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+ #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+ #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
+ #endif
  
- struct fp_cost_table
---- a/src/gcc/config/arm/aarch-cost-tables.h
-+++ b/src/gcc/config/arm/aarch-cost-tables.h
-@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs =
-     1,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (2),	/* storef.  */
-     COSTS_N_INSNS (3),	/* stored.  */
--    COSTS_N_INSNS (1)  /* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs =
-     0,			/* arith.  */
-     0,			/* logical.  */
-     COSTS_N_INSNS (1),	/* shift.  */
--    COSTS_N_INSNS (2),	/* shift_reg.  */
-+    0,			/* shift_reg.  */
-     COSTS_N_INSNS (1),	/* arith_shift.  */
--    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
-     COSTS_N_INSNS (1),	/* log_shift.  */
--    COSTS_N_INSNS (2),	/* log_shift_reg.  */
--    0,			/* extend.  */
-+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* extend.  */
-     COSTS_N_INSNS (1),	/* extend_arith.  */
-     COSTS_N_INSNS (1),	/* bfi.  */
-     COSTS_N_INSNS (1),	/* bfx.  */
-@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
-     2,				/* stm_regs_per_insn_subsequent.  */
-     0,				/* storef.  */
-     0,				/* stored.  */
--    COSTS_N_INSNS (1)		/* store_unaligned.  */
-+    COSTS_N_INSNS (1),		/* store_unaligned.  */
-+    COSTS_N_INSNS (1),		/* loadv.  */
-+    COSTS_N_INSNS (1)		/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
-     2,                         /* stm_regs_per_insn_subsequent.  */
-     0,                         /* storef.  */
-     0,                         /* stored.  */
--    COSTS_N_INSNS (1)          /* store_unaligned.  */
-+    COSTS_N_INSNS (1),         /* store_unaligned.  */
-+    COSTS_N_INSNS (1),         /* loadv.  */
-+    COSTS_N_INSNS (1)          /* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs =
-     0,                         /* storef.  */
-     0,                         /* stored.  */
-     0,                         /* store_unaligned.  */
-+    COSTS_N_INSNS (1),         /* loadv.  */
-+    COSTS_N_INSNS (1)          /* storev.  */
-   },
-   {
-     /* FP SFmode */
---- a/src/gcc/config/arm/arm-cores.def
-+++ b/src/gcc/config/arm/arm-cores.def
-@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex
- ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)
- ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
- ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
--ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
-+ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, marvell_pj4)
+ /* Determine what functions are present at the runtime;
+--- a/src/gcc/configure
++++ b/src/gcc/configure
+@@ -767,10 +767,6 @@ REPORT_BUGS_TEXI
+ REPORT_BUGS_TO
+ PKGVERSION
+ CONFIGURE_SPECS
+-CROSS_SYSTEM_HEADER_DIR
+-TARGET_SYSTEM_ROOT_DEFINE
+-TARGET_SYSTEM_ROOT
+-SYSROOT_CFLAGS_FOR_TARGET
+ enable_shared
+ enable_fixed_point
+ enable_decimal_float
+@@ -809,6 +805,10 @@ LDFLAGS
+ CFLAGS
+ CC
+ GENINSRC
++CROSS_SYSTEM_HEADER_DIR
++TARGET_SYSTEM_ROOT_DEFINE
++TARGET_SYSTEM_ROOT
++SYSROOT_CFLAGS_FOR_TARGET
+ target_subdir
+ host_subdir
+ build_subdir
+@@ -870,6 +870,9 @@ ac_user_opts='
+ enable_option_checking
+ with_build_libsubdir
+ with_local_prefix
++with_native_system_header_dir
++with_build_sysroot
++with_sysroot
+ with_gxx_include_dir
+ with_cpp_install_dir
+ enable_generated_files_in_srcdir
+@@ -896,9 +899,6 @@ enable_tls
+ enable_objc_gc
+ with_dwarf2
+ enable_shared
+-with_native_system_header_dir
+-with_build_sysroot
+-with_sysroot
+ with_specs
+ with_pkgversion
+ with_bugurl
+@@ -1680,6 +1680,12 @@ Optional Packages:
+   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+   --with-build-libsubdir=DIR  Directory where to find libraries for build system
+   --with-local-prefix=DIR specifies directory to put local include
++  --with-native-system-header-dir=dir
++                          use dir as the directory to look for standard
++                          system header files in.  Defaults to /usr/include.
++  --with-build-sysroot=sysroot
++                          use sysroot as the system root during the build
++  --with-sysroot[=DIR]    search for usr/lib, usr/include, et al, within DIR
+   --with-gxx-include-dir=DIR
+                           specifies directory to put g++ header files
+   --with-cpp-install-dir=DIR
+@@ -1692,14 +1698,9 @@ Optional Packages:
+   --with-as               arrange to use the specified as (full pathname)
+   --with-stabs            arrange to use stabs instead of host debug format
+   --with-dwarf2           force the default debug format to be DWARF 2
+-  --with-native-system-header-dir=dir
+-                          use dir as the directory to look for standard
+-                          system header files in.  Defaults to /usr/include.
+-  --with-build-sysroot=sysroot
+-                          use sysroot as the system root during the build
+-  --with-sysroot[=DIR]    search for usr/lib, usr/include, et al, within DIR
+   --with-specs=SPECS      add SPECS to driver command-line processing
+-  --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
++  --with-pkgversion=PKG   Use PKG in the version string in place of "Linaro
++                          GCC `cat $srcdir/LINARO-VERSION`"
+   --with-bugurl=URL       Direct users to URL to report a bug
+   --with-multilib-list    select multilibs (AArch64, SH and x86-64 only)
+   --with-gnu-ld           assume the C compiler uses GNU ld default=no
+@@ -3339,6 +3340,83 @@ if test x$local_prefix = x; then
+ 	local_prefix=/usr/local
+ fi
  
- /* V7 big.LITTLE implementations */
- ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
---- a/src/gcc/config/arm/arm-protos.h
-+++ b/src/gcc/config/arm/arm-protos.h
-@@ -66,10 +66,6 @@ extern rtx legitimize_tls_address (rtx, rtx);
- extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
- extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
- extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT);
--extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int,
--					   int);
--extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int,
--					    int);
- extern int thumb1_legitimate_address_p (machine_mode, rtx, int);
- extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode,
-                                  bool, bool);
-@@ -257,13 +253,6 @@ struct cpu_vec_costs {
++
++# Check whether --with-native-system-header-dir was given.
++if test "${with_native_system_header_dir+set}" = set; then :
++  withval=$with_native_system_header_dir;
++ case ${with_native_system_header_dir} in
++ yes|no) as_fn_error "bad value ${withval} given for --with-native-system-header-dir" "$LINENO" 5 ;;
++ /* | [A-Za-z]:[\\/]*) ;;
++ *) as_fn_error "--with-native-system-header-dir argument ${withval} must be an absolute directory" "$LINENO" 5 ;;
++ esac
++ configured_native_system_header_dir="${withval}"
++
++else
++  configured_native_system_header_dir=
++fi
++
++
++
++# Check whether --with-build-sysroot was given.
++if test "${with_build_sysroot+set}" = set; then :
++  withval=$with_build_sysroot; if test x"$withval" != x ; then
++     SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval"
++   fi
++else
++  SYSROOT_CFLAGS_FOR_TARGET=
++fi
++
++
++
++if test "x$prefix" = xNONE; then
++ test_prefix=/usr/local
++else
++ test_prefix=$prefix
++fi
++if test "x$exec_prefix" = xNONE; then
++ test_exec_prefix=$test_prefix
++else
++ test_exec_prefix=$exec_prefix
++fi
++
++
++# Check whether --with-sysroot was given.
++if test "${with_sysroot+set}" = set; then :
++  withval=$with_sysroot;
++ case ${with_sysroot} in
++ /) ;;
++ */) with_sysroot=`echo $with_sysroot | sed 's,/$,,'` ;;
++ esac
++ case ${with_sysroot} in
++ yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;;
++ *) TARGET_SYSTEM_ROOT=$with_sysroot ;;
++ esac
++
++ TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"'
++ CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)'
++
++ case ${TARGET_SYSTEM_ROOT} in
++ "${test_prefix}"|"${test_prefix}/"*|\
++ "${test_exec_prefix}"|"${test_exec_prefix}/"*|\
++ '${prefix}'|'${prefix}/'*|\
++ '${exec_prefix}'|'${exec_prefix}/'*)
++   t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE"
++   TARGET_SYSTEM_ROOT_DEFINE="$t"
++   ;;
++ esac
++
++else
++
++ TARGET_SYSTEM_ROOT=
++ TARGET_SYSTEM_ROOT_DEFINE=
++ CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include'
++
++fi
++
++
++
++
++
+ # Don't set gcc_gxx_include_dir to gxx_include_dir since that's only
+ # passed in by the toplevel make and thus we'd get different behavior
+ # depending on where we built the sources.
+@@ -3372,7 +3450,9 @@ gcc_gxx_include_dir_add_sysroot=0
+ if test "${with_sysroot+set}" = set; then
+   gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'`
+   if test "${gcc_gxx_without_sysroot}"; then
+-    gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
++    if test x${with_sysroot} != x/; then
++      gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
++    fi
+     gcc_gxx_include_dir_add_sysroot=1
+   fi
+ fi
+@@ -7269,79 +7349,6 @@ fi
  
- struct cpu_cost_table;
  
--enum arm_sched_autopref
--  {
--    ARM_SCHED_AUTOPREF_OFF,
--    ARM_SCHED_AUTOPREF_RANK,
--    ARM_SCHED_AUTOPREF_FULL
--  };
+ 
+-# Check whether --with-native-system-header-dir was given.
+-if test "${with_native_system_header_dir+set}" = set; then :
+-  withval=$with_native_system_header_dir;
+- case ${with_native_system_header_dir} in
+- yes|no) as_fn_error "bad value ${withval} given for --with-native-system-header-dir" "$LINENO" 5 ;;
+- /* | [A-Za-z]:[\\/]*) ;;
+- *) as_fn_error "--with-native-system-header-dir argument ${withval} must be an absolute directory" "$LINENO" 5 ;;
+- esac
+- configured_native_system_header_dir="${withval}"
 -
- /* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this
-    structure is modified.  */
+-else
+-  configured_native_system_header_dir=
+-fi
+-
+-
+-
+-# Check whether --with-build-sysroot was given.
+-if test "${with_build_sysroot+set}" = set; then :
+-  withval=$with_build_sysroot; if test x"$withval" != x ; then
+-     SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval"
+-   fi
+-else
+-  SYSROOT_CFLAGS_FOR_TARGET=
+-fi
+-
+-
+-
+-if test "x$prefix" = xNONE; then
+- test_prefix=/usr/local
+-else
+- test_prefix=$prefix
+-fi
+-if test "x$exec_prefix" = xNONE; then
+- test_exec_prefix=$test_prefix
+-else
+- test_exec_prefix=$exec_prefix
+-fi
+-
+-
+-# Check whether --with-sysroot was given.
+-if test "${with_sysroot+set}" = set; then :
+-  withval=$with_sysroot;
+- case ${with_sysroot} in
+- yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;;
+- *) TARGET_SYSTEM_ROOT=$with_sysroot ;;
+- esac
+-
+- TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"'
+- CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)'
+-
+- case ${TARGET_SYSTEM_ROOT} in
+- "${test_prefix}"|"${test_prefix}/"*|\
+- "${test_exec_prefix}"|"${test_exec_prefix}/"*|\
+- '${prefix}'|'${prefix}/'*|\
+- '${exec_prefix}'|'${exec_prefix}/'*)
+-   t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE"
+-   TARGET_SYSTEM_ROOT_DEFINE="$t"
+-   ;;
+- esac
+-
+-else
+-
+- TARGET_SYSTEM_ROOT=
+- TARGET_SYSTEM_ROOT_DEFINE=
+- CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include'
+-
+-fi
+-
+-
+-
+-
+-
+-
+ # Check whether --with-specs was given.
+ if test "${with_specs+set}" = set; then :
+   withval=$with_specs; CONFIGURE_SPECS=$withval
+@@ -7362,7 +7369,7 @@ if test "${with_pkgversion+set}" = set; then :
+       *)   PKGVERSION="($withval) " ;;
+      esac
+ else
+-  PKGVERSION="(GCC) "
++  PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
  
-@@ -272,39 +261,57 @@ struct tune_params
-   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
-   const struct cpu_cost_table *insn_extra_cost;
-   bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *);
-+  int (*branch_cost) (bool, bool);
-+  /* Vectorizer costs.  */
-+  const struct cpu_vec_costs* vec_costs;
-   int constant_limit;
-   /* Maximum number of instructions to conditionalise.  */
-   int max_insns_skipped;
--  int num_prefetch_slots;
--  int l1_cache_size;
--  int l1_cache_line_size;
--  bool prefer_constant_pool;
--  int (*branch_cost) (bool, bool);
-+  /* Maximum number of instructions to inline calls to memset.  */
-+  int max_insns_inline_memset;
-+  /* Issue rate of the processor.  */
-+  unsigned int issue_rate;
-+  /* Explicit prefetch data.  */
-+  struct
-+    {
-+      int num_slots;
-+      int l1_cache_size;
-+      int l1_cache_line_size;
-+    } prefetch;
-+  enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE}
-+    prefer_constant_pool: 1;
-   /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
--  bool prefer_ldrd_strd;
-+  enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1;
-   /* The preference for non short cirtcuit operation when optimizing for
-      performance. The first element covers Thumb state and the second one
-      is for ARM state.  */
--  bool logical_op_non_short_circuit[2];
--  /* Vectorizer costs.  */
--  const struct cpu_vec_costs* vec_costs;
--  /* Prefer Neon for 64-bit bitops.  */
--  bool prefer_neon_for_64bits;
-+  enum log_op_non_sc {LOG_OP_NON_SC_FALSE, LOG_OP_NON_SC_TRUE};
-+  log_op_non_sc logical_op_non_short_circuit_thumb: 1;
-+  log_op_non_sc logical_op_non_short_circuit_arm: 1;
-   /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding.  */
--  bool disparage_flag_setting_t16_encodings;
--  /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
--     would be set.  */
--  bool disparage_partial_flag_setting_t16_encodings;
-+  enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL}
-+    disparage_flag_setting_t16_encodings: 2;
-+  enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1;
-   /* Prefer to inline string operations like memset by using Neon.  */
--  bool string_ops_prefer_neon;
--  /* Maximum number of instructions to inline calls to memset.  */
--  int max_insns_inline_memset;
--  /* Bitfield encoding the fuseable pairs of instructions.  */
--  unsigned int fuseable_ops;
-+  enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE}
-+    string_ops_prefer_neon: 1;
-+  /* Bitfield encoding the fuseable pairs of instructions.  Use FUSE_OPS
-+     in an initializer if multiple fusion operations are supported on a
-+     target.  */
-+  enum fuse_ops
-+  {
-+    FUSE_NOTHING   = 0,
-+    FUSE_MOVW_MOVT = 1 << 0
-+  } fuseable_ops: 1;
-   /* Depth of scheduling queue to check for L2 autoprefetcher.  */
--  enum arm_sched_autopref sched_autopref;
-+  enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL}
-+    sched_autopref: 2;
- };
+ fi
  
-+/* Smash multiple fusion operations into a type that can be used for an
-+   initializer.  */
-+#define FUSE_OPS(x) ((tune_params::fuse_ops) (x))
-+
- extern const struct tune_params *current_tune;
- extern int vfp3_const_double_for_fract_bits (rtx);
- /* return power of two from operand, otherwise 0.  */
---- a/src/gcc/config/arm/arm.c
-+++ b/src/gcc/config/arm/arm.c
-@@ -940,11 +940,13 @@ struct processors
- };
+@@ -18162,7 +18169,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 18165 "configure"
++#line 18172 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -18268,7 +18275,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 18271 "configure"
++#line 18278 "configure"
+ #include "confdefs.h"
  
+ #if HAVE_DLFCN_H
+@@ -27802,6 +27809,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then :
+ else
+   gcc_cv_libc_provides_ssp=no
+     case "$target" in
++       *-*-musl*)
++	 # All versions of musl provide stack protector
++	 gcc_cv_libc_provides_ssp=yes;;
+        *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
+       # glibc 2.4 and later provides __stack_chk_fail and
+       # either __stack_chk_guard, or TLS access to stack guard canary.
+@@ -27834,6 +27844,7 @@ fi
+ 	 # <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
+ 	 # simply assert that glibc does provide this, which is true for all
+ 	 # realistically usable GNU/Hurd configurations.
++	 # All supported versions of musl provide it as well
+ 	 gcc_cv_libc_provides_ssp=yes;;
+        *-*-darwin* | *-*-freebsd*)
+ 	 ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail"
+@@ -27930,6 +27941,9 @@ case "$target" in
+       gcc_cv_target_dl_iterate_phdr=no
+     fi
+     ;;
++  *-linux-musl*)
++    gcc_cv_target_dl_iterate_phdr=yes
++    ;;
+ esac
  
--#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
--#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
--  prefetch_slots, \
--  l1_size, \
--  l1_line_size
-+#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
-+#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
-+  {								\
-+    num_slots,							\
-+    l1_size,							\
-+    l1_line_size						\
-+  }
+ if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
+--- a/src/gcc/configure.ac
++++ b/src/gcc/configure.ac
+@@ -121,6 +121,73 @@ if test x$local_prefix = x; then
+ 	local_prefix=/usr/local
+ fi
  
- /* arm generic vectorizer costs.  */
- static const
-@@ -1027,7 +1029,9 @@ const struct cpu_cost_table cortexa9_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (1),	/* storef.  */
-     COSTS_N_INSNS (1),	/* stored.  */
--    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1128,7 +1132,9 @@ const struct cpu_cost_table cortexa8_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (1),	/* storef.  */
-     COSTS_N_INSNS (1),	/* stored.  */
--    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1230,7 +1236,9 @@ const struct cpu_cost_table cortexa5_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (2),	/* storef.  */
-     COSTS_N_INSNS (2),	/* stored.  */
--    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1333,7 +1341,9 @@ const struct cpu_cost_table cortexa7_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (2),	/* storef.  */
-     COSTS_N_INSNS (2),	/* stored.  */
--    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1434,7 +1444,9 @@ const struct cpu_cost_table cortexa12_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (2),	/* storef.  */
-     COSTS_N_INSNS (2),	/* stored.  */
--    0			/* store_unaligned.  */
-+    0,			/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1535,7 +1547,9 @@ const struct cpu_cost_table cortexa15_extra_costs =
-     2,			/* stm_regs_per_insn_subsequent.  */
-     0,			/* storef.  */
-     0,			/* stored.  */
--    0			/* store_unaligned.  */
-+    0,			/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1636,7 +1650,9 @@ const struct cpu_cost_table v7m_extra_costs =
-     1,			/* stm_regs_per_insn_subsequent.  */
-     COSTS_N_INSNS (2),	/* storef.  */
-     COSTS_N_INSNS (3),	/* stored.  */
--    COSTS_N_INSNS (1)  /* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store_unaligned.  */
-+    COSTS_N_INSNS (1),	/* loadv.  */
-+    COSTS_N_INSNS (1)	/* storev.  */
-   },
-   {
-     /* FP SFmode */
-@@ -1678,49 +1694,50 @@ const struct cpu_cost_table v7m_extra_costs =
-   }
- };
++AC_ARG_WITH([native-system-header-dir],
++  [  --with-native-system-header-dir=dir
++                          use dir as the directory to look for standard
++                          system header files in.  Defaults to /usr/include.],
++[
++ case ${with_native_system_header_dir} in
++ yes|no) AC_MSG_ERROR([bad value ${withval} given for --with-native-system-header-dir]) ;;
++ /* | [[A-Za-z]]:[[\\/]]*) ;;
++ *) AC_MSG_ERROR([--with-native-system-header-dir argument ${withval} must be an absolute directory]) ;;
++ esac
++ configured_native_system_header_dir="${withval}"
++], [configured_native_system_header_dir=])
++
++AC_ARG_WITH(build-sysroot, 
++  [AS_HELP_STRING([--with-build-sysroot=sysroot],
++                  [use sysroot as the system root during the build])],
++  [if test x"$withval" != x ; then
++     SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval"
++   fi],
++  [SYSROOT_CFLAGS_FOR_TARGET=])
++AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET)
++
++if test "x$prefix" = xNONE; then
++ test_prefix=/usr/local
++else
++ test_prefix=$prefix
++fi
++if test "x$exec_prefix" = xNONE; then
++ test_exec_prefix=$test_prefix
++else
++ test_exec_prefix=$exec_prefix
++fi
++
++AC_ARG_WITH(sysroot,
++[AS_HELP_STRING([[--with-sysroot[=DIR]]],
++		[search for usr/lib, usr/include, et al, within DIR])],
++[
++ case ${with_sysroot} in
++ /) ;;
++ */) with_sysroot=`echo $with_sysroot | sed 's,/$,,'` ;;
++ esac
++ case ${with_sysroot} in
++ yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;;
++ *) TARGET_SYSTEM_ROOT=$with_sysroot ;;
++ esac
++   
++ TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"'
++ CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)'
++	
++ case ${TARGET_SYSTEM_ROOT} in
++ "${test_prefix}"|"${test_prefix}/"*|\
++ "${test_exec_prefix}"|"${test_exec_prefix}/"*|\
++ '${prefix}'|'${prefix}/'*|\
++ '${exec_prefix}'|'${exec_prefix}/'*)
++   t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE"
++   TARGET_SYSTEM_ROOT_DEFINE="$t"
++   ;;
++ esac
++], [
++ TARGET_SYSTEM_ROOT=
++ TARGET_SYSTEM_ROOT_DEFINE=
++ CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include'
++])
++AC_SUBST(TARGET_SYSTEM_ROOT)
++AC_SUBST(TARGET_SYSTEM_ROOT_DEFINE)
++AC_SUBST(CROSS_SYSTEM_HEADER_DIR)
++
+ # Don't set gcc_gxx_include_dir to gxx_include_dir since that's only
+ # passed in by the toplevel make and thus we'd get different behavior
+ # depending on where we built the sources.
+@@ -152,7 +219,9 @@ gcc_gxx_include_dir_add_sysroot=0
+ if test "${with_sysroot+set}" = set; then
+   gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'`
+   if test "${gcc_gxx_without_sysroot}"; then
+-    gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
++    if test x${with_sysroot} != x/; then
++      gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
++    fi
+     gcc_gxx_include_dir_add_sysroot=1
+   fi
+ fi
+@@ -791,69 +860,6 @@ AC_ARG_ENABLE(shared,
+ ], [enable_shared=yes])
+ AC_SUBST(enable_shared)
+ 
+-AC_ARG_WITH([native-system-header-dir],
+-  [  --with-native-system-header-dir=dir
+-                          use dir as the directory to look for standard
+-                          system header files in.  Defaults to /usr/include.],
+-[
+- case ${with_native_system_header_dir} in
+- yes|no) AC_MSG_ERROR([bad value ${withval} given for --with-native-system-header-dir]) ;;
+- /* | [[A-Za-z]]:[[\\/]]*) ;;
+- *) AC_MSG_ERROR([--with-native-system-header-dir argument ${withval} must be an absolute directory]) ;;
+- esac
+- configured_native_system_header_dir="${withval}"
+-], [configured_native_system_header_dir=])
+-
+-AC_ARG_WITH(build-sysroot, 
+-  [AS_HELP_STRING([--with-build-sysroot=sysroot],
+-                  [use sysroot as the system root during the build])],
+-  [if test x"$withval" != x ; then
+-     SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval"
+-   fi],
+-  [SYSROOT_CFLAGS_FOR_TARGET=])
+-AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET)
+-
+-if test "x$prefix" = xNONE; then
+- test_prefix=/usr/local
+-else
+- test_prefix=$prefix
+-fi
+-if test "x$exec_prefix" = xNONE; then
+- test_exec_prefix=$test_prefix
+-else
+- test_exec_prefix=$exec_prefix
+-fi
+-
+-AC_ARG_WITH(sysroot,
+-[AS_HELP_STRING([[--with-sysroot[=DIR]]],
+-		[search for usr/lib, usr/include, et al, within DIR])],
+-[
+- case ${with_sysroot} in
+- yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;;
+- *) TARGET_SYSTEM_ROOT=$with_sysroot ;;
+- esac
+-   
+- TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"'
+- CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)'
+-	
+- case ${TARGET_SYSTEM_ROOT} in
+- "${test_prefix}"|"${test_prefix}/"*|\
+- "${test_exec_prefix}"|"${test_exec_prefix}/"*|\
+- '${prefix}'|'${prefix}/'*|\
+- '${exec_prefix}'|'${exec_prefix}/'*)
+-   t="$TARGET_SYSTEM_ROOT_DEFINE -DTARGET_SYSTEM_ROOT_RELOCATABLE"
+-   TARGET_SYSTEM_ROOT_DEFINE="$t"
+-   ;;
+- esac
+-], [
+- TARGET_SYSTEM_ROOT=
+- TARGET_SYSTEM_ROOT_DEFINE=
+- CROSS_SYSTEM_HEADER_DIR='$(gcc_tooldir)/sys-include'
+-])
+-AC_SUBST(TARGET_SYSTEM_ROOT)
+-AC_SUBST(TARGET_SYSTEM_ROOT_DEFINE)
+-AC_SUBST(CROSS_SYSTEM_HEADER_DIR)
+-
+ AC_ARG_WITH(specs,
+   [AS_HELP_STRING([--with-specs=SPECS],
+                   [add SPECS to driver command-line processing])],
+@@ -862,7 +868,7 @@ AC_ARG_WITH(specs,
+ )
+ AC_SUBST(CONFIGURE_SPECS)
  
--#define ARM_FUSE_NOTHING	(0)
--#define ARM_FUSE_MOVW_MOVT	(1 << 0)
--
- const struct tune_params arm_slowmul_tune =
- {
-   arm_slowmul_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   3,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-ACX_PKGVERSION([GCC])
++ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
+ ACX_BUGURL([http://gcc.gnu.org/bugs.html])
  
- const struct tune_params arm_fastmul_tune =
- {
-   arm_fastmul_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ # Sanity check enable_languages in case someone does not run the toplevel
+@@ -5282,6 +5288,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
+       gcc_cv_libc_provides_ssp,
+       [gcc_cv_libc_provides_ssp=no
+     case "$target" in
++       *-*-musl*)
++	 # All versions of musl provide stack protector
++	 gcc_cv_libc_provides_ssp=yes;;
+        *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
+       # glibc 2.4 and later provides __stack_chk_fail and
+       # either __stack_chk_guard, or TLS access to stack guard canary.
+@@ -5308,6 +5317,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
+ 	 # <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
+ 	 # simply assert that glibc does provide this, which is true for all
+ 	 # realistically usable GNU/Hurd configurations.
++	 # All supported versions of musl provide it as well
+ 	 gcc_cv_libc_provides_ssp=yes;;
+        *-*-darwin* | *-*-freebsd*)
+ 	 AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes],
+@@ -5381,6 +5391,9 @@ case "$target" in
+       gcc_cv_target_dl_iterate_phdr=no
+     fi
+     ;;
++  *-linux-musl*)
++    gcc_cv_target_dl_iterate_phdr=yes
++    ;;
+ esac
+ GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR])
+ if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
+--- a/src/gcc/cp/Make-lang.in
++++ b/src/gcc/cp/Make-lang.in
+@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets
+ # List of targets that can use the generic check- rule and its // variant.
+ lang_checks += check-g++
+ lang_checks_parallelized += check-g++
+-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
++# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
+ check_g++_parallelize = 10000
+ #
+ # Install hooks:
+@@ -221,6 +221,7 @@ c++.mostlyclean:
+ 	-rm -f doc/g++.1
+ 	-rm -f cp/*$(objext)
+ 	-rm -f cp/*$(coverageexts)
++	-rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext)
+ c++.clean:
+ c++.distclean:
+ 	-rm -f cp/config.status cp/Makefile
+--- a/src/gcc/cppbuiltin.c
++++ b/src/gcc/cppbuiltin.c
+@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel)
+     *patchlevel = s_patchlevel;
+ }
+ 
++/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
++   to create Linaro release number YYYYMM and spin version.  */
++static void
++parse_linarover (int *release, int *spin)
++{
++  static int s_year = -1, s_month, s_spin;
++
++  if (s_year == -1)
++    if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
++      {
++	sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
++	s_spin = 0;
++      }
++
++  if (release)
++    *release = s_year * 100 + s_month;
++
++  if (spin)
++    *spin = s_spin;
++}
  
- /* StrongARM has early execution of branches, so a sequence that is worth
-@@ -1729,233 +1746,279 @@ const struct tune_params arm_fastmul_tune =
- const struct tune_params arm_strongarm_tune =
+ /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__.  */
+ static void
+ define__GNUC__ (cpp_reader *pfile)
  {
-   arm_fastmul_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   3,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-  int major, minor, patchlevel;
++  int major, minor, patchlevel, linaro_release, linaro_spin;
  
- const struct tune_params arm_xscale_tune =
- {
-   arm_xscale_rtx_costs,
--  NULL,
-+  NULL,					/* Insn extra costs.  */
-   xscale_sched_adjust_cost,
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   2,						/* Constant limit.  */
-   3,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+   parse_basever (&major, &minor, &patchlevel);
++  parse_linarover (&linaro_release, &linaro_spin);
+   cpp_define_formatted (pfile, "__GNUC__=%d", major);
+   cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
+   cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
+   cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
++  cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
++  cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
+   cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
+   cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
+   cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+--- a/src/gcc/cprop.c
++++ b/src/gcc/cprop.c
+@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x)
+   return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
+ }
  
- const struct tune_params arm_9e_tune =
- {
-   arm_9e_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
-+};
++/* Determine whether the rtx X should be treated as a register that can
++   be propagated.  Any pseudo-register is fine.  */
 +
-+const struct tune_params arm_marvell_pj4_tune =
++static bool
++cprop_reg_p (const_rtx x)
 +{
-+  arm_9e_rtx_costs,
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-   arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  &arm_default_vec_cost,
-+  1,						/* Constant limit.  */
-+  5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-+  ARM_PREFETCH_NOT_BENEFICIAL,
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
++  return REG_P (x) && !HARD_REGISTER_P (x);
++}
++
+ /* Scan SET present in INSN and add an entry to the hash TABLE.
+    IMPLICIT is true if it's an implicit set, false otherwise.  */
+ 
+@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
+   rtx src = SET_SRC (set);
+   rtx dest = SET_DEST (set);
+ 
+-  if (REG_P (dest)
+-      && ! HARD_REGISTER_P (dest)
++  if (cprop_reg_p (dest)
+       && reg_available_p (dest, insn)
+       && can_copy_p (GET_MODE (dest)))
+     {
+@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
+ 	src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
+ 
+       /* Record sets for constant/copy propagation.  */
+-      if ((REG_P (src)
++      if ((cprop_reg_p (src)
+ 	   && src != dest
+-	   && ! HARD_REGISTER_P (src)
+ 	   && reg_available_p (src, insn))
+ 	  || cprop_constant_p (src))
+ 	insert_set_in_table (dest, src, insn, table, implicit);
+@@ -758,12 +765,37 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
+   int success = 0;
+   rtx set = single_set (insn);
+ 
++  bool check_rtx_costs = true;
++  bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
++  int old_cost = set ? set_rtx_cost (set, speed) : 0;
++
++  if ((note != 0
++      && REG_NOTE_KIND (note) == REG_EQUAL
++      && (GET_CODE (XEXP (note, 0)) == CONST
++	  || CONSTANT_P (XEXP (note, 0))))
++      || (set && CONSTANT_P (SET_SRC (set))))
++    check_rtx_costs = false;
++
+   /* Usually we substitute easy stuff, so we won't copy everything.
+      We however need to take care to not duplicate non-trivial CONST
+      expressions.  */
+   to = copy_rtx (to);
+ 
+   validate_replace_src_group (from, to, insn);
++
++  /* If TO is a constant, check the cost of the set after propagation
++     to the cost of the set before the propagation.  If the cost is
++     higher, then do not replace FROM with TO.  */
++
++  if (check_rtx_costs
++      && CONSTANT_P (to)
++      && (set_rtx_cost (set, speed) > old_cost))
++    {
++      cancel_changes (0);
++      return false;
++    }
++
++
+   if (num_changes_pending () && apply_change_group ())
+     success = 1;
+ 
+@@ -821,15 +853,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
+   return success;
+ }
+ 
+-/* Find a set of REGNOs that are available on entry to INSN's block.  Return
+-   NULL no such set is found.  */
++/* Find a set of REGNOs that are available on entry to INSN's block.  If found,
++   SET_RET[0] will be assigned a set with a register source and SET_RET[1] a
++   set with a constant source.  If not found the corresponding entry is set to
++   NULL.  */
+ 
+-static struct cprop_expr *
+-find_avail_set (int regno, rtx_insn *insn)
++static void
++find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2])
+ {
+-  /* SET1 contains the last set found that can be returned to the caller for
+-     use in a substitution.  */
+-  struct cprop_expr *set1 = 0;
++  set_ret[0] = set_ret[1] = NULL;
+ 
+   /* Loops are not possible here.  To get a loop we would need two sets
+      available at the start of the block containing INSN.  i.e. we would
+@@ -869,8 +901,10 @@ find_avail_set (int regno, rtx_insn *insn)
+          If the source operand changed, we may still use it for the next
+          iteration of this loop, but we may not use it for substitutions.  */
+ 
+-      if (cprop_constant_p (src) || reg_not_set_p (src, insn))
+-	set1 = set;
++      if (cprop_constant_p (src))
++	set_ret[1] = set;
++      else if (reg_not_set_p (src, insn))
++	set_ret[0] = set;
+ 
+       /* If the source of the set is anything except a register, then
+ 	 we have reached the end of the copy chain.  */
+@@ -881,10 +915,6 @@ find_avail_set (int regno, rtx_insn *insn)
+ 	 and see if we have an available copy into SRC.  */
+       regno = REGNO (src);
+     }
+-
+-  /* SET1 holds the last set that was available and anticipatable at
+-     INSN.  */
+-  return set1;
+ }
+ 
+ /* Subroutine of cprop_insn that tries to propagate constants into
+@@ -1050,40 +1080,40 @@ cprop_insn (rtx_insn *insn)
+   int changed = 0, changed_this_round;
+   rtx note;
+ 
+-retry:
+-  changed_this_round = 0;
+-  reg_use_count = 0;
+-  note_uses (&PATTERN (insn), find_used_regs, NULL);
+-
+-  /* We may win even when propagating constants into notes.  */
+-  note = find_reg_equal_equiv_note (insn);
+-  if (note)
+-    find_used_regs (&XEXP (note, 0), NULL);
+-
+-  for (i = 0; i < reg_use_count; i++)
++  do
+     {
+-      rtx reg_used = reg_use_table[i];
+-      unsigned int regno = REGNO (reg_used);
+-      rtx src;
+-      struct cprop_expr *set;
++      changed_this_round = 0;
++      reg_use_count = 0;
++      note_uses (&PATTERN (insn), find_used_regs, NULL);
+ 
+-      /* If the register has already been set in this block, there's
+-	 nothing we can do.  */
+-      if (! reg_not_set_p (reg_used, insn))
+-	continue;
++      /* We may win even when propagating constants into notes.  */
++      note = find_reg_equal_equiv_note (insn);
++      if (note)
++	find_used_regs (&XEXP (note, 0), NULL);
  
- const struct tune_params arm_v6t2_tune =
- {
-   arm_9e_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-      /* Find an assignment that sets reg_used and is available
+-	 at the start of the block.  */
+-      set = find_avail_set (regno, insn);
+-      if (! set)
+-	continue;
++      for (i = 0; i < reg_use_count; i++)
++	{
++	  rtx reg_used = reg_use_table[i];
++	  unsigned int regno = REGNO (reg_used);
++	  rtx src_cst = NULL, src_reg = NULL;
++	  struct cprop_expr *set[2];
+ 
+-      src = set->src;
++	  /* If the register has already been set in this block, there's
++	     nothing we can do.  */
++	  if (! reg_not_set_p (reg_used, insn))
++	    continue;
  
+-      /* Constant propagation.  */
+-      if (cprop_constant_p (src))
+-	{
+-          if (constprop_register (reg_used, src, insn))
++	  /* Find an assignment that sets reg_used and is available
++	     at the start of the block.  */
++	  find_avail_set (regno, insn, set);
++	  if (set[0])
++	    src_reg = set[0]->src;
++	  if (set[1])
++	    src_cst = set[1]->src;
 +
- /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
- const struct tune_params arm_cortex_tune =
- {
-   arm_9e_rtx_costs,
-   &generic_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
++	  /* Constant propagation.  */
++	  if (src_cst && cprop_constant_p (src_cst)
++	      && constprop_register (reg_used, src_cst, insn))
+ 	    {
+ 	      changed_this_round = changed = 1;
+ 	      global_const_prop_count++;
+@@ -1093,18 +1123,16 @@ retry:
+ 			   "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
+ 		  fprintf (dump_file, "insn %d with constant ",
+ 			   INSN_UID (insn));
+-		  print_rtl (dump_file, src);
++		  print_rtl (dump_file, src_cst);
+ 		  fprintf (dump_file, "\n");
+ 		}
+ 	      if (insn->deleted ())
+ 		return 1;
+ 	    }
+-	}
+-      else if (REG_P (src)
+-	       && REGNO (src) >= FIRST_PSEUDO_REGISTER
+-	       && REGNO (src) != regno)
+-	{
+-	  if (try_replace_reg (reg_used, src, insn))
++	  /* Copy propagation.  */
++	  else if (src_reg && cprop_reg_p (src_reg)
++		   && REGNO (src_reg) != regno
++		   && try_replace_reg (reg_used, src_reg, insn))
+ 	    {
+ 	      changed_this_round = changed = 1;
+ 	      global_copy_prop_count++;
+@@ -1113,7 +1141,7 @@ retry:
+ 		  fprintf (dump_file,
+ 			   "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
+ 			   regno, INSN_UID (insn));
+-		  fprintf (dump_file, " with reg %d\n", REGNO (src));
++		  fprintf (dump_file, " with reg %d\n", REGNO (src_reg));
+ 		}
  
- const struct tune_params arm_cortex_a8_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa8_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ 	      /* The original insn setting reg_used may or may not now be
+@@ -1123,12 +1151,10 @@ retry:
+ 		 and made things worse.  */
+ 	    }
+ 	}
+-
+-      /* If try_replace_reg simplified the insn, the regs found
+-	 by find_used_regs may not be valid anymore.  Start over.  */
+-      if (changed_this_round)
+-	goto retry;
+     }
++  /* If try_replace_reg simplified the insn, the regs found by find_used_regs
++     may not be valid anymore.  Start over.  */
++  while (changed_this_round);
  
- const struct tune_params arm_cortex_a7_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa7_extra_costs,
--  NULL,
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,			/* Vectorizer costs.  */
--  false,					/* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+   if (changed && DEBUG_INSN_P (insn))
+     return 0;
+@@ -1191,7 +1217,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
+   /* Rule out USE instructions and ASM statements as we don't want to
+      change the hard registers mentioned.  */
+   if (REG_P (x)
+-      && (REGNO (x) >= FIRST_PSEUDO_REGISTER
++      && (cprop_reg_p (x)
+           || (GET_CODE (PATTERN (insn)) != USE
+ 	      && asm_noperands (PATTERN (insn)) < 0)))
+     {
+@@ -1207,7 +1233,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
  
- const struct tune_params arm_cortex_a15_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa15_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   2,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  3,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  true,						/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  true, true,                                   /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_TRUE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_ALL,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_FULL
- };
+ 	  if (cprop_constant_p (this_rtx))
+ 	    newcnst = this_rtx;
+-	  if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
++	  if (cprop_reg_p (this_rtx)
+ 	      /* Don't copy propagate if it has attached REG_EQUIV note.
+ 		 At this point this only function parameters should have
+ 		 REG_EQUIV notes and if the argument slot is used somewhere
+@@ -1328,9 +1354,8 @@ implicit_set_cond_p (const_rtx cond)
+   if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE)
+     return false;
  
- const struct tune_params arm_cortex_a53_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa53_extra_costs,
--  NULL,						/* Scheduler cost adjustment.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,			/* Vectorizer costs.  */
--  false,					/* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-  /* The first operand of COND must be a pseudo-reg.  */
+-  if (! REG_P (XEXP (cond, 0))
+-      || HARD_REGISTER_P (XEXP (cond, 0)))
++  /* The first operand of COND must be a register we can propagate.  */
++  if (!cprop_reg_p (XEXP (cond, 0)))
+     return false;
  
- const struct tune_params arm_cortex_a57_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa57_extra_costs,
--  NULL,                                         /* Scheduler cost adjustment.  */
--  1,                                           /* Constant limit.  */
--  2,                                           /* Max cond insns.  */
--  ARM_PREFETCH_NOT_BENEFICIAL,
--  false,                                       /* Prefer constant pool.  */
-+  NULL,					/* Sched adj cost.  */
-   arm_default_branch_cost,
--  true,                                       /* Prefer LDRD/STRD.  */
--  {true, true},                                /* Prefer non short circuit.  */
--  &arm_default_vec_cost,                       /* Vectorizer costs.  */
--  false,                                       /* Prefer Neon for 64-bits bitops.  */
--  true, true,                                  /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
-+  &arm_default_vec_cost,
-+  1,						/* Constant limit.  */
-+  2,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  3,						/* Issue rate.  */
-+  ARM_PREFETCH_NOT_BENEFICIAL,
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_TRUE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_ALL,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
-+  tune_params::SCHED_AUTOPREF_FULL
- };
+   /* The second operand of COND must be a suitable constant.  */
+--- a/src/gcc/cse.c
++++ b/src/gcc/cse.c
+@@ -4540,14 +4540,49 @@ cse_insn (rtx_insn *insn)
+   canonicalize_insn (insn, &sets, n_sets);
+ 
+   /* If this insn has a REG_EQUAL note, store the equivalent value in SRC_EQV,
+-     if different, or if the DEST is a STRICT_LOW_PART.  The latter condition
+-     is necessary because SRC_EQV is handled specially for this case, and if
+-     it isn't set, then there will be no equivalence for the destination.  */
++     if different, or if the DEST is a STRICT_LOW_PART/ZERO_EXTRACT.  The
++     latter condition is necessary because SRC_EQV is handled specially for
++     this case, and if it isn't set, then there will be no equivalence
++     for the destination.  */
+   if (n_sets == 1 && REG_NOTES (insn) != 0
+       && (tem = find_reg_note (insn, REG_EQUAL, NULL_RTX)) != 0
+-      && (! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl))
+-	  || GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART))
+-    src_eqv = copy_rtx (XEXP (tem, 0));
++      && (! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl))))
++    {
++      if (GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART)
++	src_eqv = copy_rtx (XEXP (tem, 0));
++
++      /* If DEST is of the form ZERO_EXTACT, as in:
++	 (set (zero_extract:SI (reg:SI 119)
++		  (const_int 16 [0x10])
++		  (const_int 16 [0x10]))
++	      (const_int 51154 [0xc7d2]))
++	 REG_EQUAL note will specify the value of register (reg:SI 119) at this
++	 point.  Note that this is different from SRC_EQV. We can however
++	 calculate SRC_EQV with the position and width of ZERO_EXTRACT.  */
++      else if (GET_CODE (SET_DEST (sets[0].rtl)) == ZERO_EXTRACT
++	       && CONST_INT_P (XEXP (tem, 0))
++	       && CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 1))
++	       && CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 2)))
++	{
++	  rtx dest_reg = XEXP (SET_DEST (sets[0].rtl), 0);
++	  rtx width = XEXP (SET_DEST (sets[0].rtl), 1);
++	  rtx pos = XEXP (SET_DEST (sets[0].rtl), 2);
++	  HOST_WIDE_INT val = INTVAL (XEXP (tem, 0));
++	  HOST_WIDE_INT mask;
++	  unsigned int shift;
++	  if (BITS_BIG_ENDIAN)
++	    shift = GET_MODE_PRECISION (GET_MODE (dest_reg))
++	      - INTVAL (pos) - INTVAL (width);
++	  else
++	    shift = INTVAL (pos);
++	  if (INTVAL (width) == HOST_BITS_PER_WIDE_INT)
++	    mask = ~(HOST_WIDE_INT) 0;
++	  else
++	    mask = ((HOST_WIDE_INT) 1 << INTVAL (width)) - 1;
++	  val = (val >> shift) & mask;
++	  src_eqv = GEN_INT (val);
++	}
++    }
  
- const struct tune_params arm_xgene1_tune =
+   /* Set sets[i].src_elt to the class each source belongs to.
+      Detect assignments from or to volatile things
+--- a/src/gcc/df-core.c
++++ b/src/gcc/df-core.c
+@@ -642,7 +642,6 @@ void
+ df_finish_pass (bool verify ATTRIBUTE_UNUSED)
  {
-   arm_9e_rtx_costs,
-   &xgene1_extra_costs,
--  NULL,                                        /* Scheduler cost adjustment.  */
--  1,                                           /* Constant limit.  */
--  2,                                           /* Max cond insns.  */
--  ARM_PREFETCH_NOT_BENEFICIAL,
--  false,                                       /* Prefer constant pool.  */
-+  NULL,					/* Sched adj cost.  */
-   arm_default_branch_cost,
--  true,                                        /* Prefer LDRD/STRD.  */
--  {true, true},                                /* Prefer non short circuit.  */
--  &arm_default_vec_cost,                       /* Vectorizer costs.  */
--  false,                                       /* Prefer Neon for 64-bits bitops.  */
--  true, true,                                  /* Prefer 32-bit encodings.  */
--  false,				       /* Prefer Neon for stringops.  */
--  32,					       /* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  &arm_default_vec_cost,
-+  1,						/* Constant limit.  */
-+  2,						/* Max cond insns.  */
-+  32,						/* Memset max inline.  */
-+  4,						/* Issue rate.  */
-+  ARM_PREFETCH_NOT_BENEFICIAL,
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_TRUE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_ALL,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+   int i;
+-  int removed = 0;
  
- /* Branches can be dual-issued on Cortex-A5, so conditional execution is
-@@ -1965,21 +2028,23 @@ const struct tune_params arm_cortex_a5_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa5_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_cortex_a5_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   1,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_cortex_a5_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {false, false},				/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ #ifdef ENABLE_DF_CHECKING
+   int saved_flags;
+@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED)
+   saved_flags = df->changeable_flags;
+ #endif
  
- const struct tune_params arm_cortex_a9_tune =
-@@ -1987,41 +2052,45 @@ const struct tune_params arm_cortex_a9_tune =
-   arm_9e_rtx_costs,
-   &cortexa9_extra_costs,
-   cortex_a9_sched_adjust_cost,
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_BENEFICIAL(4,32,32),
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-  for (i = 0; i < df->num_problems_defined; i++)
++  /* We iterate over problems by index as each problem removed will
++     lead to problems_in_order to be reordered.  */
++  for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++)
+     {
+-      struct dataflow *dflow = df->problems_in_order[i];
+-      struct df_problem *problem = dflow->problem;
++      struct dataflow *dflow = df->problems_by_index[i];
  
- const struct tune_params arm_cortex_a12_tune =
- {
-   arm_9e_rtx_costs,
-   &cortexa12_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
-   1,						/* Constant limit.  */
-   2,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  true,						/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  true, true,                                   /* Prefer 32-bit encodings.  */
--  true,						/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_TRUE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_ALL,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_TRUE,
-+  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+-      if (dflow->optional_p)
+-	{
+-	  gcc_assert (problem->remove_problem_fun);
+-	  (problem->remove_problem_fun) ();
+-	  df->problems_in_order[i] = NULL;
+-	  df->problems_by_index[problem->id] = NULL;
+-	  removed++;
+-	}
++      if (dflow && dflow->optional_p)
++	df_remove_problem (dflow);
+     }
+-  df->num_problems_defined -= removed;
+ 
+   /* Clear all of the flags.  */
+   df->changeable_flags = 0;
+--- a/src/gcc/emit-rtl.c
++++ b/src/gcc/emit-rtl.c
+@@ -5234,7 +5234,8 @@ set_for_reg_notes (rtx insn)
+   reg = SET_DEST (pat);
+ 
+   /* Notes apply to the contents of a STRICT_LOW_PART.  */
+-  if (GET_CODE (reg) == STRICT_LOW_PART)
++  if (GET_CODE (reg) == STRICT_LOW_PART
++      || GET_CODE (reg) == ZERO_EXTRACT)
+     reg = XEXP (reg, 0);
+ 
+   /* Check that we have a register.  */
+--- a/src/gcc/expr.c
++++ b/src/gcc/expr.c
+@@ -7725,15 +7725,7 @@ expand_expr_addr_expr_1 (tree exp, rtx target, machine_mode tmode,
+ 	     marked TREE_ADDRESSABLE, which will be either a front-end
+ 	     or a tree optimizer bug.  */
+ 
+-	  if (TREE_ADDRESSABLE (exp)
+-	      && ! MEM_P (result)
+-	      && ! targetm.calls.allocate_stack_slots_for_args ())
+-	    {
+-	      error ("local frame unavailable (naked function?)");
+-	      return result;
+-	    }
+-	  else
+-	    gcc_assert (MEM_P (result));
++	  gcc_assert (MEM_P (result));
+ 	  result = XEXP (result, 0);
+ 
+ 	  /* ??? Is this needed anymore?  */
+--- a/src/gcc/fortran/Make-lang.in
++++ b/src/gcc/fortran/Make-lang.in
+@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets
+ check-fortran-subtargets : check-gfortran-subtargets
+ lang_checks += check-gfortran
+ lang_checks_parallelized += check-gfortran
+-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
++# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
+ check_gfortran_parallelize = 10000
  
- /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
-@@ -2035,21 +2104,23 @@ const struct tune_params arm_v7m_tune =
- {
-   arm_9e_rtx_costs,
-   &v7m_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_cortex_m_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   2,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_cortex_m_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {false, false},				/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ # GFORTRAN documentation.
+@@ -275,7 +275,7 @@ fortran.uninstall:
+ # We just have to delete files specific to us.
  
- /* Cortex-M7 tuning.  */
-@@ -2058,21 +2129,23 @@ const struct tune_params arm_cortex_m7_tune =
- {
-   arm_9e_rtx_costs,
-   &v7m_extra_costs,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_cortex_m7_branch_cost,
-+  &arm_default_vec_cost,
-   0,						/* Constant limit.  */
-   1,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_cortex_m7_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ fortran.mostlyclean:
+-	-rm -f f951$(exeext)
++	-rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext)
+ 	-rm -f fortran/*.o
  
- /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
-@@ -2080,43 +2153,47 @@ const struct tune_params arm_cortex_m7_tune =
- const struct tune_params arm_v6m_tune =
+ fortran.clean:
+--- a/src/gcc/function.c
++++ b/src/gcc/function.c
+@@ -227,6 +227,7 @@ free_after_compilation (struct function *f)
+   f->eh = NULL;
+   f->machine = NULL;
+   f->cfg = NULL;
++  f->curr_properties &= ~PROP_cfg;
+ 
+   regno_reg_rtx = NULL;
+ }
+@@ -2121,9 +2122,6 @@ aggregate_value_p (const_tree exp, const_tree fntype)
+ bool
+ use_register_for_decl (const_tree decl)
  {
-   arm_9e_rtx_costs,
--  NULL,
--  NULL,						/* Sched adj cost.  */
-+  NULL,					/* Insn extra costs.  */
-+  NULL,					/* Sched adj cost.  */
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  1,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  false,					/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {false, false},				/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_FALSE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_FALSE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
+-  if (!targetm.calls.allocate_stack_slots_for_args ())
+-    return true;
+-
+   /* Honor volatile.  */
+   if (TREE_SIDE_EFFECTS (decl))
+     return false;
+@@ -2151,6 +2149,9 @@ use_register_for_decl (const_tree decl)
+   if (flag_float_store && FLOAT_TYPE_P (TREE_TYPE (decl)))
+     return false;
+ 
++  if (!targetm.calls.allocate_stack_slots_for_args ())
++    return true;
++
+   /* If we're not interested in tracking debugging information for
+      this decl, then we can certainly put it in a register.  */
+   if (DECL_IGNORED_P (decl))
+--- a/src/gcc/genmatch.c
++++ b/src/gcc/genmatch.c
+@@ -2922,7 +2922,12 @@ parser::parse_operation ()
+ 
+   user_id *p = dyn_cast<user_id *> (op);
+   if (p && p->is_oper_list)
+-    record_operlist (id_tok->src_loc, p);
++    {
++      if (active_fors.length() == 0)
++	record_operlist (id_tok->src_loc, p);
++      else
++	fatal_at (id_tok, "operator-list %s cannot be exapnded inside 'for'", id);
++    }
+   return op;
+ }
+ 
+@@ -3338,8 +3343,13 @@ parser::parse_for (source_location)
+ 		      "others with arity %d", oper, idb->nargs, arity);
+ 
+ 	  user_id *p = dyn_cast<user_id *> (idb);
+-	  if (p && p->is_oper_list)
+-	    op->substitutes.safe_splice (p->substitutes);
++	  if (p)
++	    {
++	      if (p->is_oper_list)
++		op->substitutes.safe_splice (p->substitutes);
++	      else
++		fatal_at (token, "iterator cannot be used as operator-list");
++	    }
+ 	  else 
+ 	    op->substitutes.safe_push (idb);
+ 	}
+@@ -3436,6 +3446,11 @@ parser::parse_operator_list (source_location)
+ 	op->substitutes.safe_push (idb);
+     }
+ 
++  // Check that there is no junk after id-list
++  token = peek();
++  if (token->type != CPP_CLOSE_PAREN)
++    fatal_at (token, "expected identifier got %s", cpp_type2name (token->type, 0));
++
+   if (op->substitutes.length () == 0)
+     fatal_at (token, "operator-list cannot be empty");
+ 
+--- a/src/gcc/genpreds.c
++++ b/src/gcc/genpreds.c
+@@ -640,12 +640,14 @@ struct constraint_data
+   const char *regclass;  /* for register constraints */
+   rtx exp;               /* for other constraints */
+   unsigned int lineno;   /* line of definition */
+-  unsigned int is_register  : 1;
+-  unsigned int is_const_int : 1;
+-  unsigned int is_const_dbl : 1;
+-  unsigned int is_extra     : 1;
+-  unsigned int is_memory    : 1;
+-  unsigned int is_address   : 1;
++  unsigned int is_register	: 1;
++  unsigned int is_const_int	: 1;
++  unsigned int is_const_dbl	: 1;
++  unsigned int is_extra		: 1;
++  unsigned int is_memory	: 1;
++  unsigned int is_address	: 1;
++  unsigned int maybe_allows_reg : 1;
++  unsigned int maybe_allows_mem : 1;
  };
  
- const struct tune_params arm_fa726te_tune =
- {
-   arm_9e_rtx_costs,
--  NULL,
-+  NULL,					/* Insn extra costs.  */
-   fa726te_sched_adjust_cost,
-+  arm_default_branch_cost,
-+  &arm_default_vec_cost,
-   1,						/* Constant limit.  */
-   5,						/* Max cond insns.  */
-+  8,						/* Memset max inline.  */
-+  2,						/* Issue rate.  */
-   ARM_PREFETCH_NOT_BENEFICIAL,
--  true,						/* Prefer constant pool.  */
--  arm_default_branch_cost,
--  false,					/* Prefer LDRD/STRD.  */
--  {true, true},					/* Prefer non short circuit.  */
--  &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false,                                        /* Prefer Neon for 64-bits bitops.  */
--  false, false,                                 /* Prefer 32-bit encodings.  */
--  false,					/* Prefer Neon for stringops.  */
--  8,						/* Maximum insns to inline memset.  */
--  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
--  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
-+  tune_params::PREF_CONST_POOL_TRUE,
-+  tune_params::PREF_LDRD_FALSE,
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* Thumb.  */
-+  tune_params::LOG_OP_NON_SC_TRUE,		/* ARM.  */
-+  tune_params::DISPARAGE_FLAGS_NEITHER,
-+  tune_params::PREF_NEON_64_FALSE,
-+  tune_params::PREF_NEON_STRINGOPS_FALSE,
-+  tune_params::FUSE_NOTHING,
-+  tune_params::SCHED_AUTOPREF_OFF
- };
+ /* Overview of all constraints beginning with a given letter.  */
+@@ -691,6 +693,9 @@ static unsigned int satisfied_start;
+ static unsigned int const_int_start, const_int_end;
+ static unsigned int memory_start, memory_end;
+ static unsigned int address_start, address_end;
++static unsigned int maybe_allows_none_start, maybe_allows_none_end;
++static unsigned int maybe_allows_reg_start, maybe_allows_reg_end;
++static unsigned int maybe_allows_mem_start, maybe_allows_mem_end;
+ 
+ /* Convert NAME, which contains angle brackets and/or underscores, to
+    a string that can be used as part of a C identifier.  The string
+@@ -711,6 +716,34 @@ mangle (const char *name)
+   return XOBFINISH (rtl_obstack, const char *);
+ }
+ 
++/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP
++   maybe allows a MEM.  Bits should be clear only when we are sure it
++   will not allow a REG/SUBREG or a MEM.  */
++static int
++compute_maybe_allows (rtx exp)
++{
++  switch (GET_CODE (exp))
++    {
++    case IF_THEN_ELSE:
++      /* Conservative answer is like IOR, of the THEN and ELSE branches.  */
++      return compute_maybe_allows (XEXP (exp, 1))
++	     | compute_maybe_allows (XEXP (exp, 2));
++    case AND:
++      return compute_maybe_allows (XEXP (exp, 0))
++	     & compute_maybe_allows (XEXP (exp, 1));
++    case IOR:
++      return compute_maybe_allows (XEXP (exp, 0))
++	     | compute_maybe_allows (XEXP (exp, 1));
++    case MATCH_CODE:
++      if (*XSTR (exp, 1) == '\0')
++	return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0)
++	       | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0);
++      /* FALLTHRU */
++    default:
++      return 3;
++    }
++}
++
+ /* Add one constraint, of any sort, to the tables.  NAME is its name;
+    REGCLASS is the register class, if any; EXP is the expression to
+    test, if any;  IS_MEMORY and IS_ADDRESS indicate memory and address
+@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass,
+   c->is_extra = !(regclass || is_const_int || is_const_dbl);
+   c->is_memory = is_memory;
+   c->is_address = is_address;
++  int maybe_allows = 3;
++  if (exp)
++    maybe_allows = compute_maybe_allows (exp);
++  c->maybe_allows_reg = (maybe_allows & 1) != 0;
++  c->maybe_allows_mem = (maybe_allows & 2) != 0;
+ 
+   c->next_this_letter = *slot;
+   *slot = c;
+@@ -940,8 +978,30 @@ choose_enum_order (void)
+       enum_order[next++] = c;
+   address_end = next;
+ 
++  maybe_allows_none_start = next;
++  FOR_ALL_CONSTRAINTS (c)
++    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
++	&& !c->maybe_allows_reg && !c->maybe_allows_mem)
++      enum_order[next++] = c;
++  maybe_allows_none_end = next;
++
++  maybe_allows_reg_start = next;
++  FOR_ALL_CONSTRAINTS (c)
++    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
++	&& c->maybe_allows_reg && !c->maybe_allows_mem)
++      enum_order[next++] = c;
++  maybe_allows_reg_end = next;
++
++  maybe_allows_mem_start = next;
++  FOR_ALL_CONSTRAINTS (c)
++    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
++	&& !c->maybe_allows_reg && c->maybe_allows_mem)
++      enum_order[next++] = c;
++  maybe_allows_mem_end = next;
++
+   FOR_ALL_CONSTRAINTS (c)
+-    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address)
++    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
++	&& c->maybe_allows_reg && c->maybe_allows_mem)
+       enum_order[next++] = c;
+   gcc_assert (next == num_constraints);
+ }
+@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end)
+ 	    "}\n\n", name);
+ }
+ 
++/* Write a definition for insn_extra_constraint_allows_reg_mem function.  */
++static void
++write_allows_reg_mem_function (void)
++{
++  printf ("static inline void\n"
++	  "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n"
++	  "\t\t\t\t      bool *allows_reg, bool *allows_mem)\n"
++	  "{\n");
++  if (maybe_allows_none_start != maybe_allows_none_end)
++    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
++	    "    return;\n",
++	    enum_order[maybe_allows_none_start]->c_name,
++	    enum_order[maybe_allows_none_end - 1]->c_name);
++  if (maybe_allows_reg_start != maybe_allows_reg_end)
++    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
++	    "    {\n"
++	    "      *allows_reg = true;\n"
++	    "      return;\n"
++	    "    }\n",
++	    enum_order[maybe_allows_reg_start]->c_name,
++	    enum_order[maybe_allows_reg_end - 1]->c_name);
++  if (maybe_allows_mem_start != maybe_allows_mem_end)
++    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
++	    "    {\n"
++	    "      *allows_mem = true;\n"
++	    "      return;\n"
++	    "    }\n",
++	    enum_order[maybe_allows_mem_start]->c_name,
++	    enum_order[maybe_allows_mem_end - 1]->c_name);
++  printf ("  (void) c;\n"
++	  "  *allows_reg = true;\n"
++	  "  *allows_mem = true;\n"
++	  "}\n\n");
++}
++
+ /* VEC is a list of key/value pairs, with the keys being lower bounds
+    of a range.  Output a decision tree that handles the keys covered by
+    [VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s.
+@@ -1326,6 +1421,7 @@ write_tm_preds_h (void)
+ 			    memory_start, memory_end);
+       write_range_function ("insn_extra_address_constraint",
+ 			    address_start, address_end);
++      write_allows_reg_mem_function ();
+ 
+       if (constraint_max_namelen > 1)
+         {
+--- a/src/gcc/go/Make-lang.in
++++ b/src/gcc/go/Make-lang.in
+@@ -197,6 +197,7 @@ go.uninstall:
+ go.mostlyclean:
+ 	-rm -f go/*$(objext)
+ 	-rm -f go/*$(coverageexts)
++	-rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext)
+ go.clean:
+ go.distclean:
+ go.maintainer-clean:
+--- a/src/gcc/ifcvt.c
++++ b/src/gcc/ifcvt.c
+@@ -1046,7 +1046,7 @@ cc_in_cond (rtx cond)
+ 
+ /* Return sequence of instructions generated by if conversion.  This
+    function calls end_sequence() to end the current stream, ensures
+-   that are instructions are unshared, recognizable non-jump insns.
++   that the instructions are unshared, recognizable non-jump insns.
+    On failure, this function returns a NULL_RTX.  */
  
+ static rtx_insn *
+--- a/src/gcc/ira-costs.c
++++ b/src/gcc/ira-costs.c
+@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
+       rtx dest = SET_DEST (set);
+       rtx src = SET_SRC (set);
  
-@@ -3140,31 +3217,33 @@ arm_option_override (void)
-       && abi_version_at_least(2))
-     flag_strict_volatile_bitfields = 1;
+-      dest = SET_DEST (set);
+-      src = SET_SRC (set);
+       if (GET_CODE (dest) == SUBREG
+ 	  && (GET_MODE_SIZE (GET_MODE (dest))
+ 	      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
+--- a/src/gcc/jit/Make-lang.in
++++ b/src/gcc/jit/Make-lang.in
+@@ -285,6 +285,10 @@ jit.uninstall:
+ # We just have to delete files specific to us.
  
--  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
--     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
-+  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
-+     have deemed it beneficial (signified by setting
-+     prefetch.num_slots to 1 or more).  */
-   if (flag_prefetch_loop_arrays < 0
-       && HAVE_prefetch
-       && optimize >= 3
--      && current_tune->num_prefetch_slots > 0)
-+      && current_tune->prefetch.num_slots > 0)
-     flag_prefetch_loop_arrays = 1;
+ jit.mostlyclean:
++	-rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK)
++	-rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME)
++	-rm -f $(LIBGCCJIT_SONAME)
++	-rm -f $(jit_OBJS)
  
--  /* Set up parameters to be used in prefetching algorithm.  Do not override the
--     defaults unless we are tuning for a core we have researched values for.  */
--  if (current_tune->num_prefetch_slots > 0)
-+  /* Set up parameters to be used in prefetching algorithm.  Do not
-+     override the defaults unless we are tuning for a core we have
-+     researched values for.  */
-+  if (current_tune->prefetch.num_slots > 0)
-     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
--                           current_tune->num_prefetch_slots,
--                           global_options.x_param_values,
--                           global_options_set.x_param_values);
--  if (current_tune->l1_cache_line_size >= 0)
-+			   current_tune->prefetch.num_slots,
-+			   global_options.x_param_values,
-+			   global_options_set.x_param_values);
-+  if (current_tune->prefetch.l1_cache_line_size >= 0)
-     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
--                           current_tune->l1_cache_line_size,
--                           global_options.x_param_values,
--                           global_options_set.x_param_values);
--  if (current_tune->l1_cache_size >= 0)
-+			   current_tune->prefetch.l1_cache_line_size,
-+			   global_options.x_param_values,
-+			   global_options_set.x_param_values);
-+  if (current_tune->prefetch.l1_cache_size >= 0)
-     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
--                           current_tune->l1_cache_size,
--                           global_options.x_param_values,
--                           global_options_set.x_param_values);
-+			   current_tune->prefetch.l1_cache_size,
-+			   global_options.x_param_values,
-+			   global_options_set.x_param_values);
+ jit.clean:
  
-   /* Use Neon to perform 64-bits operations rather than core
-      registers.  */
-@@ -3174,24 +3253,35 @@ arm_option_override (void)
+--- a/src/gcc/loop-invariant.c
++++ b/src/gcc/loop-invariant.c
+@@ -696,6 +696,8 @@ find_defs (struct loop *loop)
+   df_remove_problem (df_chain);
+   df_process_deferred_rescans ();
+   df_chain_add_problem (DF_UD_CHAIN);
++  df_live_add_problem ();
++  df_live_set_all_dirty ();
+   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+   df_analyze_loop (loop);
+   check_invariant_table_size ();
+@@ -740,8 +742,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
+ 	 enough to not regress 410.bwaves either (by still moving reg+reg
+ 	 invariants).
+ 	 See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html .  */
+-      inv->cheap_address = address_cost (SET_SRC (set), word_mode,
+-					 ADDR_SPACE_GENERIC, speed) < 3;
++      if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
++	inv->cheap_address = address_cost (SET_SRC (set), word_mode,
++					   ADDR_SPACE_GENERIC, speed) < 3;
++      else
++	inv->cheap_address = false;
+     }
+   else
+     {
+@@ -1174,6 +1179,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
+     }
  
-   /* Use the alternative scheduling-pressure algorithm by default.  */
-   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
--                         global_options.x_param_values,
--                         global_options_set.x_param_values);
-+			 global_options.x_param_values,
-+			 global_options_set.x_param_values);
+   if (!inv->cheap_address
++      || inv->def->n_uses == 0
+       || inv->def->n_addr_uses < inv->def->n_uses)
+     (*comp_cost) += inv->cost * inv->eqno;
  
-   /* Look through ready list and all of queue for instructions
-      relevant for L2 auto-prefetcher.  */
-   int param_sched_autopref_queue_depth;
--  if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
--    param_sched_autopref_queue_depth = -1;
--  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
--    param_sched_autopref_queue_depth = 0;
--  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
--    param_sched_autopref_queue_depth = max_insn_queue_index + 1;
--  else
--    gcc_unreachable ();
+@@ -1512,6 +1518,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group)
+   return 1;
+ }
+ 
++/* Whether invariant INV setting REG can be moved out of LOOP, at the end of
++   the block preceding its header.  */
 +
-+  switch (current_tune->sched_autopref)
++static bool
++can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
++{
++  df_ref def, use;
++  unsigned int dest_regno, defs_in_loop_count = 0;
++  rtx_insn *insn = inv->insn;
++  basic_block bb = BLOCK_FOR_INSN (inv->insn);
++
++  /* We ignore hard register and memory access for cost and complexity reasons.
++     Hard register are few at this stage and expensive to consider as they
++     require building a separate data flow.  Memory access would require using
++     df_simulate_* and can_move_insns_across functions and is more complex.  */
++  if (!REG_P (reg) || HARD_REGISTER_P (reg))
++    return false;
++
++  /* Check whether the set is always executed.  We could omit this condition if
++     we know that the register is unused outside of the loop, but it does not
++     seem worth finding out.  */
++  if (!inv->always_executed)
++    return false;
++
++  /* Check that all uses that would be dominated by def are already dominated
++     by it.  */
++  dest_regno = REGNO (reg);
++  for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
 +    {
-+    case tune_params::SCHED_AUTOPREF_OFF:
-+      param_sched_autopref_queue_depth = -1;
-+      break;
++      rtx_insn *use_insn;
++      basic_block use_bb;
 +
-+    case tune_params::SCHED_AUTOPREF_RANK:
-+      param_sched_autopref_queue_depth = 0;
-+      break;
++      use_insn = DF_REF_INSN (use);
++      use_bb = BLOCK_FOR_INSN (use_insn);
 +
-+    case tune_params::SCHED_AUTOPREF_FULL:
-+      param_sched_autopref_queue_depth = max_insn_queue_index + 1;
-+      break;
++      /* Ignore instruction considered for moving.  */
++      if (use_insn == insn)
++	continue;
 +
-+    default:
-+      gcc_unreachable ();
++      /* Don't consider uses outside loop.  */
++      if (!flow_bb_inside_loop_p (loop, use_bb))
++	continue;
++
++      /* Don't move if a use is not dominated by def in insn.  */
++      if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
++	return false;
++      if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
++	return false;
++    }
++
++  /* Check for other defs.  Any other def in the loop might reach a use
++     currently reached by the def in insn.  */
++  for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
++    {
++      basic_block def_bb = DF_REF_BB (def);
++
++      /* Defs in exit block cannot reach a use they weren't already.  */
++      if (single_succ_p (def_bb))
++	{
++	  basic_block def_bb_succ;
++
++	  def_bb_succ = single_succ (def_bb);
++	  if (!flow_bb_inside_loop_p (loop, def_bb_succ))
++	    continue;
++	}
++
++      if (++defs_in_loop_count > 1)
++	return false;
 +    }
 +
-   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
- 			 param_sched_autopref_queue_depth,
--                         global_options.x_param_values,
--                         global_options_set.x_param_values);
-+			 global_options.x_param_values,
-+			 global_options_set.x_param_values);
++  return true;
++}
++
+ /* Move invariant INVNO out of the LOOP.  Returns true if this succeeds, false
+    otherwise.  */
  
-   /* Disable shrink-wrap when optimizing function for size, since it tends to
-      generate additional returns.  */
-@@ -7946,236 +8036,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
-   return x;
- }
+@@ -1545,11 +1624,8 @@ move_invariant_reg (struct loop *loop, unsigned invno)
+ 	    }
+ 	}
  
--bool
--arm_legitimize_reload_address (rtx *p,
--			       machine_mode mode,
--			       int opnum, int type,
--			       int ind_levels ATTRIBUTE_UNUSED)
--{
--  /* We must recognize output that we have already generated ourselves.  */
--  if (GET_CODE (*p) == PLUS
--      && GET_CODE (XEXP (*p, 0)) == PLUS
--      && REG_P (XEXP (XEXP (*p, 0), 0))
--      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
--      && CONST_INT_P (XEXP (*p, 1)))
--    {
--      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
--		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
--		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
--      return true;
--    }
--
--  if (GET_CODE (*p) == PLUS
--      && REG_P (XEXP (*p, 0))
--      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
--      /* If the base register is equivalent to a constant, let the generic
--	 code handle it.  Otherwise we will run into problems if a future
--	 reload pass decides to rematerialize the constant.  */
--      && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
--      && CONST_INT_P (XEXP (*p, 1)))
--    {
--      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
--      HOST_WIDE_INT low, high;
--
--      /* Detect coprocessor load/stores.  */
--      bool coproc_p = ((TARGET_HARD_FLOAT
--			&& TARGET_VFP
--			&& (mode == SFmode || mode == DFmode))
--		       || (TARGET_REALLY_IWMMXT
--			   && VALID_IWMMXT_REG_MODE (mode))
--		       || (TARGET_NEON
--			   && (VALID_NEON_DREG_MODE (mode)
--			       || VALID_NEON_QREG_MODE (mode))));
--
--      /* For some conditions, bail out when lower two bits are unaligned.  */
--      if ((val & 0x3) != 0
--	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
--	  && (coproc_p
--	      /* For DI, and DF under soft-float: */
--	      || ((mode == DImode || mode == DFmode)
--		  /* Without ldrd, we use stm/ldm, which does not
--		     fair well with unaligned bits.  */
--		  && (! TARGET_LDRD
--		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
--		      || TARGET_THUMB2))))
--	return false;
--
--      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
--	 of which the (reg+high) gets turned into a reload add insn,
--	 we try to decompose the index into high/low values that can often
--	 also lead to better reload CSE.
--	 For example:
--	         ldr r0, [r2, #4100]  // Offset too large
--		 ldr r1, [r2, #4104]  // Offset too large
--
--	 is best reloaded as:
--	         add t1, r2, #4096
--		 ldr r0, [t1, #4]
--		 add t2, r2, #4096
--		 ldr r1, [t2, #8]
--
--	 which post-reload CSE can simplify in most cases to eliminate the
--	 second add instruction:
--	         add t1, r2, #4096
--		 ldr r0, [t1, #4]
--		 ldr r1, [t1, #8]
--
--	 The idea here is that we want to split out the bits of the constant
--	 as a mask, rather than as subtracting the maximum offset that the
--	 respective type of load/store used can handle.
--
--	 When encountering negative offsets, we can still utilize it even if
--	 the overall offset is positive; sometimes this may lead to an immediate
--	 that can be constructed with fewer instructions.
--	 For example:
--	         ldr r0, [r2, #0x3FFFFC]
--
--	 This is best reloaded as:
--	         add t1, r2, #0x400000
--		 ldr r0, [t1, #-4]
--
--	 The trick for spotting this for a load insn with N bits of offset
--	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
--	 negative offset that is going to make bit N and all the bits below
--	 it become zero in the remainder part.
--
--	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
--	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
--	 used in most cases of ARM load/store instructions.  */
--
--#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
--      (((VAL) & ((1 << (N)) - 1))					\
--       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
--       : 0)
--
--      if (coproc_p)
--	{
--	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
--
--	  /* NEON quad-word load/stores are made of two double-word accesses,
--	     so the valid index range is reduced by 8. Treat as 9-bit range if
--	     we go over it.  */
--	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
--	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
--	}
--      else if (GET_MODE_SIZE (mode) == 8)
--	{
--	  if (TARGET_LDRD)
--	    low = (TARGET_THUMB2
--		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
--		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
--	  else
--	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
--	       to access doublewords. The supported load/store offsets are
--	       -8, -4, and 4, which we try to produce here.  */
--	    low = ((val & 0xf) ^ 0x8) - 0x8;
--	}
--      else if (GET_MODE_SIZE (mode) < 8)
--	{
--	  /* NEON element load/stores do not have an offset.  */
--	  if (TARGET_NEON_FP16 && mode == HFmode)
--	    return false;
--
--	  if (TARGET_THUMB2)
--	    {
--	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
--		 Try the wider 12-bit range first, and re-try if the result
--		 is out of range.  */
--	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
--	      if (low < -255)
--		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
--	    }
--	  else
--	    {
--	      if (mode == HImode || mode == HFmode)
--		{
--		  if (arm_arch4)
--		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
--		  else
--		    {
--		      /* The storehi/movhi_bytes fallbacks can use only
--			 [-4094,+4094] of the full ldrb/strb index range.  */
--		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
--		      if (low == 4095 || low == -4095)
--			return false;
--		    }
--		}
--	      else
--		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
--	    }
--	}
--      else
--	return false;
--
--      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
--	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
--	      - (unsigned HOST_WIDE_INT) 0x80000000);
--      /* Check for overflow or zero */
--      if (low == 0 || high == 0 || (high + low != val))
--	return false;
--
--      /* Reload the high part into a base reg; leave the low part
--	 in the mem.
--	 Note that replacing this gen_rtx_PLUS with plus_constant is
--	 wrong in this case because we rely on the
--	 (plus (plus reg c1) c2) structure being preserved so that
--	 XEXP (*p, 0) in push_reload below uses the correct term.  */
--      *p = gen_rtx_PLUS (GET_MODE (*p),
--			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
--				       GEN_INT (high)),
--			 GEN_INT (low));
--      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
--		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
--		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
--      return true;
--    }
--
--  return false;
--}
--
--rtx
--thumb_legitimize_reload_address (rtx *x_p,
--				 machine_mode mode,
--				 int opnum, int type,
--				 int ind_levels ATTRIBUTE_UNUSED)
--{
--  rtx x = *x_p;
--
--  if (GET_CODE (x) == PLUS
--      && GET_MODE_SIZE (mode) < 4
--      && REG_P (XEXP (x, 0))
--      && XEXP (x, 0) == stack_pointer_rtx
--      && CONST_INT_P (XEXP (x, 1))
--      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
--    {
--      rtx orig_x = x;
--
--      x = copy_rtx (x);
--      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
--		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
--      return x;
--    }
--
--  /* If both registers are hi-regs, then it's better to reload the
--     entire expression rather than each register individually.  That
--     only requires one reload register rather than two.  */
--  if (GET_CODE (x) == PLUS
--      && REG_P (XEXP (x, 0))
--      && REG_P (XEXP (x, 1))
--      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
--      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
--    {
--      rtx orig_x = x;
--
--      x = copy_rtx (x);
--      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
--		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
--      return x;
--    }
--
--  return NULL;
--}
--
- /* Return TRUE if X contains any TLS symbol references.  */
+-      /* Move the set out of the loop.  If the set is always executed (we could
+-	 omit this condition if we know that the register is unused outside of
+-	 the loop, but it does not seem worth finding out) and it has no uses
+-	 that would not be dominated by it, we may just move it (TODO).
+-	 Otherwise we need to create a temporary register.  */
++      /* If possible, just move the set out of the loop.  Otherwise, we
++	 need to create a temporary register.  */
+       set = single_set (inv->insn);
+       reg = dest = SET_DEST (set);
+       if (GET_CODE (reg) == SUBREG)
+@@ -1557,20 +1633,27 @@ move_invariant_reg (struct loop *loop, unsigned invno)
+       if (REG_P (reg))
+ 	regno = REGNO (reg);
+ 
+-      reg = gen_reg_rtx_and_attrs (dest);
++      if (!can_move_invariant_reg (loop, inv, dest))
++	{
++	  reg = gen_reg_rtx_and_attrs (dest);
  
- bool
-@@ -9399,7 +9259,8 @@ static bool
- arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
- {
-   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
--  gcc_assert (GET_CODE (x) == UNSPEC);
-+  rtx_code code = GET_CODE (x);
-+  gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
+-      /* Try replacing the destination by a new pseudoregister.  */
+-      validate_change (inv->insn, &SET_DEST (set), reg, true);
++	  /* Try replacing the destination by a new pseudoregister.  */
++	  validate_change (inv->insn, &SET_DEST (set), reg, true);
  
-   switch (XINT (x, 1))
-     {
-@@ -9445,7 +9306,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
-       *cost = COSTS_N_INSNS (2);
-       break;
-     }
--  return false;
-+  return true;
- }
+-      /* As well as all the dominated uses.  */
+-      replace_uses (inv, reg, true);
++	  /* As well as all the dominated uses.  */
++	  replace_uses (inv, reg, true);
  
- /* Cost of a libcall.  We assume one insn per argument, an amount for the
-@@ -11008,6 +10869,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-       *cost = LIBCALL_COST (1);
-       return false;
+-      /* And validate all the changes.  */
+-      if (!apply_change_group ())
+-	goto fail;
++	  /* And validate all the changes.  */
++	  if (!apply_change_group ())
++	    goto fail;
  
-+    case UNSPEC_VOLATILE:
-     case UNSPEC:
-       return arm_unspec_cost (x, outer_code, speed_p, cost);
+-      emit_insn_after (gen_move_insn (dest, reg), inv->insn);
++	  emit_insn_after (gen_move_insn (dest, reg), inv->insn);
++	}
++      else if (dump_file)
++	fprintf (dump_file, "Invariant %d moved without introducing a new "
++			    "temporary register\n", invno);
+       reorder_insns (inv->insn, inv->insn, BB_END (preheader));
++      df_recompute_luids (preheader);
  
-@@ -17287,14 +17149,16 @@ thumb2_reorg (void)
+       /* If there is a REG_EQUAL note on the insn we just moved, and the
+ 	 insn is in a basic block that is not always executed or the note
+--- a/src/gcc/lra-constraints.c
++++ b/src/gcc/lra-constraints.c
+@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass,
+ {
+   HARD_REG_SET temp;
+   
+-  // ??? Is this assert right
+-  // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass]));
++  lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set));
+   COPY_HARD_REG_SET (temp, set);
+   AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs);
+   return (hard_reg_set_subset_p
+--- a/src/gcc/objc/Make-lang.in
++++ b/src/gcc/objc/Make-lang.in
+@@ -114,6 +114,7 @@ objc.uninstall:
+ objc.mostlyclean:
+ 	-rm -f objc/*$(objext) objc/xforward objc/fflags
+ 	-rm -f objc/*$(coverageexts)
++	-rm -f cc1obj$(exeext)
+ objc.clean: objc.mostlyclean
+ 	-rm -rf objc-headers
+ objc.distclean:
+--- a/src/gcc/objcp/Make-lang.in
++++ b/src/gcc/objcp/Make-lang.in
+@@ -142,6 +142,7 @@ obj-c++.uninstall:
+ obj-c++.mostlyclean:
+ 	-rm -f objcp/*$(objext)
+ 	-rm -f objcp/*$(coverageexts)
++	-rm -f cc1objplus$(exeext)
+ obj-c++.clean: obj-c++.mostlyclean
+ obj-c++.distclean:
+ 	-rm -f objcp/config.status objcp/Makefile
+--- a/src/gcc/optabs.c
++++ b/src/gcc/optabs.c
+@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
+ {
+   struct expand_operand ops[2];
+   rtx rtx_op0, rtx_op1;
++  machine_mode m0, m1;
+   enum rtx_code rcode = get_rtx_code (tcode, unsignedp);
  
-   FOR_EACH_BB_FN (bb, cfun)
-     {
--      if (current_tune->disparage_flag_setting_t16_encodings
-+      if ((current_tune->disparage_flag_setting_t16_encodings
-+	   == tune_params::DISPARAGE_FLAGS_ALL)
- 	  && optimize_bb_for_speed_p (bb))
- 	continue;
+   gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison);
  
-       rtx_insn *insn;
-       Convert_Action action = SKIP;
-       Convert_Action action_for_partial_flag_setting
--	= (current_tune->disparage_partial_flag_setting_t16_encodings
-+	= ((current_tune->disparage_flag_setting_t16_encodings
-+	    != tune_params::DISPARAGE_FLAGS_NEITHER)
- 	   && optimize_bb_for_speed_p (bb))
- 	  ? SKIP : CONV;
+-  /* Expand operands.  */
++  /* Expand operands.  For vector types with scalar modes, e.g. where int64x1_t
++     has mode DImode, this can produce a constant RTX of mode VOIDmode; in such
++     cases, use the original mode.  */
+   rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)),
+ 			 EXPAND_STACK_PARM);
++  m0 = GET_MODE (rtx_op0);
++  if (m0 == VOIDmode)
++    m0 = TYPE_MODE (TREE_TYPE (t_op0));
++
+   rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)),
+ 			 EXPAND_STACK_PARM);
++  m1 = GET_MODE (rtx_op1);
++  if (m1 == VOIDmode)
++    m1 = TYPE_MODE (TREE_TYPE (t_op1));
  
-@@ -25660,12 +25524,12 @@ arm_print_tune_info (void)
- 	       current_tune->constant_limit);
-   asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
- 	       current_tune->max_insns_skipped);
--  asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
--	       current_tune->num_prefetch_slots);
--  asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
--	       current_tune->l1_cache_size);
--  asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
--	       current_tune->l1_cache_line_size);
-+  asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
-+	       current_tune->prefetch.num_slots);
-+  asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
-+	       current_tune->prefetch.l1_cache_size);
-+  asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
-+	       current_tune->prefetch.l1_cache_line_size);
-   asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
- 	       (int) current_tune->prefer_constant_pool);
-   asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
-@@ -25681,17 +25545,13 @@ arm_print_tune_info (void)
-   asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
- 	       (int) current_tune->prefer_ldrd_strd);
-   asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
--	       (int) current_tune->logical_op_non_short_circuit[0],
--	       (int) current_tune->logical_op_non_short_circuit[1]);
-+	       (int) current_tune->logical_op_non_short_circuit_thumb,
-+	       (int) current_tune->logical_op_non_short_circuit_arm);
-   asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
- 	       (int) current_tune->prefer_neon_for_64bits);
-   asm_fprintf (asm_out_file,
- 	       "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
- 	       (int) current_tune->disparage_flag_setting_t16_encodings);
--  asm_fprintf (asm_out_file,
--	       "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
--	       (int) current_tune
--	               ->disparage_partial_flag_setting_t16_encodings);
-   asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
- 	       (int) current_tune->string_ops_prefer_neon);
-   asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
-@@ -27213,40 +27073,12 @@ thumb2_output_casesi (rtx *operands)
+-  create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0));
+-  create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1));
++  create_input_operand (&ops[0], rtx_op0, m0);
++  create_input_operand (&ops[1], rtx_op1, m1);
+   if (!maybe_legitimize_operands (icode, 4, 2, ops))
+     gcc_unreachable ();
+   return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
+--- a/src/gcc/params.def
++++ b/src/gcc/params.def
+@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH,
+ 	 "Maximum depth of search in the dominator tree for expressions to hoist",
+ 	 30, 0, 0)
+ 
++
++/* When synthesizing expnonentiation by a real constant operations using square
++   roots, this controls how deep sqrt chains we are willing to generate.  */
++DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH,
++	 "max-pow-sqrt-depth",
++	 "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant",
++	 5, 1, 32)
++
+ /* This parameter limits the number of insns in a loop that will be unrolled,
+    and by how much the loop is unrolled.
+ 
+--- a/src/gcc/regrename.c
++++ b/src/gcc/regrename.c
+@@ -505,12 +505,20 @@ rename_chains (void)
+ 	  continue;
+ 	}
+ 
+-      if (dump_file)
+-	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
+-
+-      regrename_do_replace (this_head, best_new_reg);
+-      tick[best_new_reg] = ++this_tick;
+-      df_set_regs_ever_live (best_new_reg, true);
++      if (regrename_do_replace (this_head, best_new_reg))
++	{
++	  if (dump_file)
++	    fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
++	  tick[best_new_reg] = ++this_tick;
++	  df_set_regs_ever_live (best_new_reg, true);
++	}
++      else
++	{
++	  if (dump_file)
++	    fprintf (dump_file, ", renaming as %s failed\n",
++		     reg_names[best_new_reg]);
++	  tick[reg] = ++this_tick;
++	}
      }
  }
  
--/* Most ARM cores are single issue, but some newer ones can dual issue.
--   The scheduler descriptions rely on this being correct.  */
-+/* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
-+   per-core tuning structs.  */
- static int
- arm_issue_rate (void)
+@@ -936,7 +944,13 @@ regrename_analyze (bitmap bb_mask)
+     bb->aux = NULL;
+ }
+ 
+-void
++/* Attempt to replace all uses of the register in the chain beginning with
++   HEAD with REG.  Returns true on success and false if the replacement is
++   rejected because the insns would not validate.  The latter can happen
++   e.g. if a match_parallel predicate enforces restrictions on register
++   numbering in its subpatterns.  */
++
++bool
+ regrename_do_replace (struct du_head *head, int reg)
  {
--  switch (arm_tune)
--    {
--    case xgene1:
--      return 4;
--
--    case cortexa15:
--    case cortexa57:
--    case exynosm1:
--      return 3;
--
--    case cortexm7:
--    case cortexr4:
--    case cortexr4f:
--    case cortexr5:
--    case genericv7a:
--    case cortexa5:
--    case cortexa7:
--    case cortexa8:
--    case cortexa9:
--    case cortexa12:
--    case cortexa17:
--    case cortexa53:
--    case fa726te:
--    case marvell_pj4:
--      return 2;
+   struct du_chain *chain;
+@@ -950,22 +964,26 @@ regrename_do_replace (struct du_head *head, int reg)
+       int reg_ptr = REG_POINTER (*chain->loc);
+ 
+       if (DEBUG_INSN_P (chain->insn) && REGNO (*chain->loc) != base_regno)
+-	INSN_VAR_LOCATION_LOC (chain->insn) = gen_rtx_UNKNOWN_VAR_LOC ();
++	validate_change (chain->insn, &(INSN_VAR_LOCATION_LOC (chain->insn)),
++			 gen_rtx_UNKNOWN_VAR_LOC (), true);
+       else
+ 	{
+-	  *chain->loc = gen_raw_REG (GET_MODE (*chain->loc), reg);
++	  validate_change (chain->insn, chain->loc, 
++			   gen_raw_REG (GET_MODE (*chain->loc), reg), true);
+ 	  if (regno >= FIRST_PSEUDO_REGISTER)
+ 	    ORIGINAL_REGNO (*chain->loc) = regno;
+ 	  REG_ATTRS (*chain->loc) = attr;
+ 	  REG_POINTER (*chain->loc) = reg_ptr;
+ 	}
 -
--    default:
--      return 1;
--    }
-+  return current_tune->issue_rate;
+-      df_insn_rescan (chain->insn);
+     }
+ 
++  if (!apply_change_group ())
++    return false;
++
+   mode = GET_MODE (*head->first->loc);
+   head->regno = reg;
+   head->nregs = hard_regno_nregs[reg][mode];
++  return true;
  }
  
- /* Return how many instructions should scheduler lookahead to choose the
-@@ -29411,7 +29243,7 @@ arm_gen_setmem (rtx *operands)
- static bool
- arm_macro_fusion_p (void)
- {
--  return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
-+  return current_tune->fuseable_ops != tune_params::FUSE_NOTHING;
+ 
+--- a/src/gcc/regrename.h
++++ b/src/gcc/regrename.h
+@@ -91,6 +91,6 @@ extern void regrename_analyze (bitmap);
+ extern du_head_p regrename_chain_from_id (unsigned int);
+ extern int find_rename_reg (du_head_p, enum reg_class, HARD_REG_SET *, int,
+ 			    bool);
+-extern void regrename_do_replace (du_head_p, int);
++extern bool regrename_do_replace (du_head_p, int);
+ 
+ #endif
+--- a/src/gcc/rtlanal.c
++++ b/src/gcc/rtlanal.c
+@@ -104,7 +104,10 @@ generic_subrtx_iterator <T>::add_single_to_queue (array_type &array,
+ 	  return base;
+ 	}
+       gcc_checking_assert (i == LOCAL_ELEMS);
+-      vec_safe_grow (array.heap, i + 1);
++      /* A previous iteration might also have moved from the stack to the
++	 heap, in which case the heap array will already be big enough.  */
++      if (vec_safe_length (array.heap) <= i)
++	vec_safe_grow (array.heap, i + 1);
+       base = array.heap->address ();
+       memcpy (base, array.stack, sizeof (array.stack));
+       base[LOCAL_ELEMS] = x;
+--- a/src/gcc/sched-deps.c
++++ b/src/gcc/sched-deps.c
+@@ -2856,7 +2856,7 @@ sched_analyze_2 (struct deps_desc *deps, rtx x, rtx_insn *insn)
+     sched_deps_info->finish_rhs ();
  }
  
+-/* Try to group two fuseable insns together to prevent scheduler
++/* Try to group two fusible insns together to prevent scheduler
+    from scheduling them apart.  */
  
-@@ -29432,44 +29264,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
-   if (!arm_macro_fusion_p ())
-     return false;
+ static void
+--- a/src/gcc/simplify-rtx.c
++++ b/src/gcc/simplify-rtx.c
+@@ -978,6 +978,32 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
+       if (GET_CODE (op) == NEG)
+ 	return XEXP (op, 0);
+ 
++      /* (neg (x ? (neg y) : y)) == !x ? (neg y) : y.
++	 If comparison is not reversible use
++	 x ? y : (neg y).  */
++      if (GET_CODE (op) == IF_THEN_ELSE)
++	{
++	  rtx cond = XEXP (op, 0);
++	  rtx true_rtx = XEXP (op, 1);
++	  rtx false_rtx = XEXP (op, 2);
++
++	  if ((GET_CODE (true_rtx) == NEG
++	       && rtx_equal_p (XEXP (true_rtx, 0), false_rtx))
++	       || (GET_CODE (false_rtx) == NEG
++		   && rtx_equal_p (XEXP (false_rtx, 0), true_rtx)))
++	    {
++	      if (reversed_comparison_code (cond, NULL_RTX) != UNKNOWN)
++		temp = reversed_comparison (cond, mode);
++	      else
++		{
++		  temp = cond;
++		  std::swap (true_rtx, false_rtx);
++		}
++	      return simplify_gen_ternary (IF_THEN_ELSE, mode,
++					    mode, temp, true_rtx, false_rtx);
++	    }
++	}
++
+       /* (neg (plus X 1)) can become (not X).  */
+       if (GET_CODE (op) == PLUS
+ 	  && XEXP (op, 1) == const1_rtx)
+@@ -1171,7 +1197,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
+          = (float_truncate:SF foo:DF).
  
--  if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
-+  if (current_tune->fuseable_ops & tune_params::FUSE_MOVW_MOVT)
-     {
-       /* We are trying to fuse
--         movw imm / movt imm
--         instructions as a group that gets scheduled together.  */
-+	 movw imm / movt imm
-+	 instructions as a group that gets scheduled together.  */
+          (float_truncate:DF (float_extend:XF foo:SF))
+-         = (float_extend:SF foo:DF).  */
++         = (float_extend:DF foo:SF).  */
+       if ((GET_CODE (op) == FLOAT_TRUNCATE
+ 	   && flag_unsafe_math_optimizations)
+ 	  || GET_CODE (op) == FLOAT_EXTEND)
+@@ -1183,14 +1209,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
+ 				   XEXP (op, 0), mode);
  
-       set_dest = SET_DEST (curr_set);
+       /*  (float_truncate (float x)) is (float x)  */
+-      if (GET_CODE (op) == FLOAT
++      if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
+ 	  && (flag_unsafe_math_optimizations
+ 	      || (SCALAR_FLOAT_MODE_P (GET_MODE (op))
+ 		  && ((unsigned)significand_size (GET_MODE (op))
+ 		      >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
+ 			  - num_sign_bit_copies (XEXP (op, 0),
+ 						 GET_MODE (XEXP (op, 0))))))))
+-	return simplify_gen_unary (FLOAT, mode,
++	return simplify_gen_unary (GET_CODE (op), mode,
+ 				   XEXP (op, 0),
+ 				   GET_MODE (XEXP (op, 0)));
  
-       if (GET_MODE (set_dest) != SImode)
--        return false;
-+	return false;
+@@ -1221,7 +1247,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
+ 	  rounding can't happen.
+           */
+       if (GET_CODE (op) == FLOAT_EXTEND
+-	  || (GET_CODE (op) == FLOAT
++	  || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
+ 	      && SCALAR_FLOAT_MODE_P (GET_MODE (op))
+ 	      && ((unsigned)significand_size (GET_MODE (op))
+ 		  >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
+--- a/src/gcc/stmt.c
++++ b/src/gcc/stmt.c
+@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num,
+ 	else if (insn_extra_memory_constraint (cn))
+ 	  *allows_mem = true;
+ 	else
+-	  {
+-	    /* Otherwise we can't assume anything about the nature of
+-	       the constraint except that it isn't purely registers.
+-	       Treat it like "g" and hope for the best.  */
+-	    *allows_reg = true;
+-	    *allows_mem = true;
+-	  }
++	  insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
+ 	break;
+       }
  
-       /* We are trying to match:
--         prev (movw)  == (set (reg r0) (const_int imm16))
--         curr (movt) == (set (zero_extract (reg r0)
--                                           (const_int 16)
--                                           (const_int 16))
--                             (const_int imm16_1))
--         or
--         prev (movw) == (set (reg r1)
--                              (high (symbol_ref ("SYM"))))
--         curr (movt) == (set (reg r0)
--                             (lo_sum (reg r1)
--                                     (symbol_ref ("SYM"))))  */
-+	 prev (movw)  == (set (reg r0) (const_int imm16))
-+	 curr (movt) == (set (zero_extract (reg r0)
-+					  (const_int 16)
-+					   (const_int 16))
-+			     (const_int imm16_1))
-+	 or
-+	 prev (movw) == (set (reg r1)
-+			      (high (symbol_ref ("SYM"))))
-+	 curr (movt) == (set (reg r0)
-+			     (lo_sum (reg r1)
-+				     (symbol_ref ("SYM"))))  */
-       if (GET_CODE (set_dest) == ZERO_EXTRACT)
--        {
--          if (CONST_INT_P (SET_SRC (curr_set))
--              && CONST_INT_P (SET_SRC (prev_set))
--              && REG_P (XEXP (set_dest, 0))
--              && REG_P (SET_DEST (prev_set))
--              && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
--            return true;
--        }
-+	{
-+	  if (CONST_INT_P (SET_SRC (curr_set))
-+	      && CONST_INT_P (SET_SRC (prev_set))
-+	      && REG_P (XEXP (set_dest, 0))
-+	      && REG_P (SET_DEST (prev_set))
-+	      && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
-+	    return true;
-+	}
-       else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
--               && REG_P (SET_DEST (curr_set))
--               && REG_P (SET_DEST (prev_set))
--               && GET_CODE (SET_SRC (prev_set)) == HIGH
--               && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
--             return true;
-+	       && REG_P (SET_DEST (curr_set))
-+	       && REG_P (SET_DEST (prev_set))
-+	       && GET_CODE (SET_SRC (prev_set)) == HIGH
-+	       && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
-+	     return true;
-     }
-   return false;
- }
---- a/src/gcc/config/arm/arm.h
-+++ b/src/gcc/config/arm/arm.h
-@@ -1360,46 +1360,6 @@ enum reg_class
-      ? GENERAL_REGS : NO_REGS)					\
-     : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num,
+ 	else if (insn_extra_memory_constraint (cn))
+ 	  *allows_mem = true;
+ 	else
+-	  {
+-	    /* Otherwise we can't assume anything about the nature of
+-	       the constraint except that it isn't purely registers.
+-	       Treat it like "g" and hope for the best.  */
+-	    *allows_reg = true;
+-	    *allows_mem = true;
+-	  }
++	  insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
+ 	break;
+       }
  
--/* Try a machine-dependent way of reloading an illegitimate address
--   operand.  If we find one, push the reload and jump to WIN.  This
--   macro is used in only one place: `find_reloads_address' in reload.c.
--
--   For the ARM, we wish to handle large displacements off a base
--   register by splitting the addend across a MOV and the mem insn.
--   This can cut the number of reloads needed.  */
--#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN)	   \
--  do									   \
--    {									   \
--      if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND))	   \
--	goto WIN;							   \
--    }									   \
--  while (0)
--
--/* XXX If an HImode FP+large_offset address is converted to an HImode
--   SP+large_offset address, then reload won't know how to fix it.  It sees
--   only that SP isn't valid for HImode, and so reloads the SP into an index
--   register, but the resulting address is still invalid because the offset
--   is too big.  We fix it here instead by reloading the entire address.  */
--/* We could probably achieve better results by defining PROMOTE_MODE to help
--   cope with the variances between the Thumb's signed and unsigned byte and
--   halfword load instructions.  */
--/* ??? This should be safe for thumb2, but we may be able to do better.  */
--#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)     \
--do {									      \
--  rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \
--  if (new_x)								      \
--    {									      \
--      X = new_x;							      \
--      goto WIN;								      \
--    }									      \
--} while (0)
--
--#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)   \
--  if (TARGET_ARM)							   \
--    ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \
--  else									   \
--    THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)
--
- /* Return the maximum number of consecutive registers
-    needed to represent mode MODE in a register of class CLASS.
-    ARM regs are UNITS_PER_WORD bits.  
-@@ -2096,10 +2056,11 @@ enum arm_auto_incmodes
-   (current_tune->branch_cost (speed_p, predictable_p))
+--- a/src/gcc/target.def
++++ b/src/gcc/target.def
+@@ -1975,7 +1975,7 @@ merging.",
+ DEFHOOKPOD
+ (attribute_table,
+  "If defined, this target hook points to an array of @samp{struct\n\
+-attribute_spec} (defined in @file{tree.h}) specifying the machine\n\
++attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\
+ specific attributes for this target and some of the restrictions on the\n\
+ entities to which these attributes are applied and the arguments they\n\
+ take.",
+--- a/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess1.c
++++ b/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess1.c
+@@ -1,6 +1,7 @@
+ /* Test -Wsizeof-pointer-memaccess warnings.  */
+ /* { dg-do compile } */
+ /* { dg-options "-Wall -Wno-sizeof-array-argument" } */
++/* { dg-options "-Wall -Wno-sizeof-array-argument -Wno-c++-compat" { target c } } */
  
- /* False if short circuit operation is preferred.  */
--#define LOGICAL_OP_NON_SHORT_CIRCUIT				\
--  ((optimize_size)						\
--   ? (TARGET_THUMB ? false : true)				\
--   : (current_tune->logical_op_non_short_circuit[TARGET_ARM]))
-+#define LOGICAL_OP_NON_SHORT_CIRCUIT					\
-+  ((optimize_size)							\
-+   ? (TARGET_THUMB ? false : true)					\
-+   : TARGET_THUMB ? static_cast<bool> (current_tune->logical_op_non_short_circuit_thumb) \
-+   : static_cast<bool> (current_tune->logical_op_non_short_circuit_arm))
+ typedef __SIZE_TYPE__ size_t;
+ #ifdef __cplusplus
+--- a/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c
++++ b/src/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c
+@@ -1,6 +1,7 @@
+ /* Test -Wsizeof-pointer-memaccess warnings.  */
+ /* { dg-do compile } */
+ /* { dg-options "-Wall -O2 -Wno-sizeof-array-argument" } */
++/* { dg-options "-Wall -O2 -Wno-sizeof-array-argument -Wno-c++-compat" {target c} } */
  
- 
- /* Position Independent Code.  */
---- a/src/gcc/config/arm/arm.md
-+++ b/src/gcc/config/arm/arm.md
-@@ -1177,9 +1177,9 @@
+ #define bos(ptr) __builtin_object_size (ptr, 1)
+ #define bos0(ptr) __builtin_object_size (ptr, 0)
+--- a/src/gcc/testsuite/c-c++-common/pr58346-1.c
++++ b/src/gcc/testsuite/c-c++-common/pr58346-1.c
+@@ -1,5 +1,6 @@
+ /* PR c/58346 */
+ /* { dg-do compile } */
++/* { dg-options "-Wno-c++-compat" { target c } } */
  
- ; ??? Check Thumb-2 split length
- (define_insn_and_split "*arm_subsi3_insn"
--  [(set (match_operand:SI           0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r")
--	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n")
--		  (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))]
-+  [(set (match_operand:SI           0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r")
-+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n")
-+		  (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))]
-   "TARGET_32BIT"
-   "@
-    sub%?\\t%0, %1, %2
-@@ -2768,6 +2768,55 @@
- 		      (const_string "logic_shift_reg")))]
- )
+ struct U {
+ #ifdef __cplusplus
+--- a/src/gcc/testsuite/c-c++-common/transparent-union-1.c
++++ b/src/gcc/testsuite/c-c++-common/transparent-union-1.c
+@@ -1,4 +1,5 @@
+ /* PR c++/51228 */
++/* { dg-options "-Wno-c++-compat" { target c } } */
  
-+;; Shifted bics pattern used to set up CC status register and not reusing
-+;; bics output.  Pattern restricts Thumb2 shift operand as bics for Thumb2
-+;; does not support shift by register.
-+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
-+  [(set (reg:CC_NOOV CC_REGNUM)
-+	(compare:CC_NOOV
-+		(and:SI (not:SI (match_operator:SI 0 "shift_operator"
-+			[(match_operand:SI 1 "s_register_operand" "r")
-+			 (match_operand:SI 2 "arm_rhs_operand" "rM")]))
-+			(match_operand:SI 3 "s_register_operand" "r"))
-+		(const_int 0)))
-+   (clobber (match_scratch:SI 4 "=r"))]
-+  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
-+  "bic%.%?\\t%4, %3, %1%S0"
-+  [(set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "conds" "set")
-+   (set_attr "shift" "1")
-+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
-+		      (const_string "logic_shift_imm")
-+		      (const_string "logic_shift_reg")))]
-+)
+ typedef union {} U __attribute__((transparent_union)); /* { dg-warning "ignored" } */
+ 
+--- a/src/gcc/testsuite/g++.dg/ext/pr57735.C
++++ b/src/gcc/testsuite/g++.dg/ext/pr57735.C
+@@ -1,4 +1,7 @@
+ /* { dg-do compile { target arm*-*-* } } */
++/* { dg-require-effective-target arm_arch_v5te_ok } */
++/* { dg-require-effective-target arm_arm_ok } */
++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */
+ /* { dg-options "-march=armv5te -marm  -mtune=xscale -mfloat-abi=soft -O1" } */
+ 
+ typedef unsigned int size_t;
+--- a/src//dev/null
++++ b/src/gcc/testsuite/g++.dg/tree-ssa/pr66726.c
+@@ -0,0 +1,36 @@
++
++/* { dg-do run } */
++/* { dg-options "-O2" } */
++
++/* Execution test for converting VIEW_CONVERT_EXPR.  */
++
++struct cpp_num {
++  bool f;
++};
++
++extern cpp_num  __attribute__((noinline))
++foo (cpp_num lhs,
++     cpp_num rhs)
++{
++  lhs.f = lhs.f || rhs.f;
++  return lhs;
++}
++
++cpp_num lhs, rhs, r;
++
++int main ()
++{
++
++  lhs.f = false;
++  rhs.f = false;
++  r = foo (lhs, rhs);
++  if (r.f)
++    __builtin_abort ();
++
++
++  lhs.f = false;
++  rhs.f = true;
++  r = foo (lhs, rhs);
++  if (!r.f)
++    __builtin_abort ();
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/compile/pr66168.c
+@@ -0,0 +1,15 @@
++int a, b;
++
++void
++fn1 ()
++{
++  for (;;)
++    {
++      for (b = 0; b < 3; b++)
++	{
++	  char e[2];
++	  char f = e[1];
++	  a ^= f ? 1 / f : 0;
++	}
++    }
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c
+@@ -0,0 +1,34 @@
++/* PR target/65648 */
++
++int a = 0, *b = 0, c = 0;
++static int d = 0;
++short e = 1;
++static long long f = 0;
++long long *i = &f;
++unsigned char j = 0;
++
++__attribute__((noinline, noclone)) void
++foo (int x, int *y)
++{
++  asm volatile ("" : : "r" (x), "r" (y) : "memory");
++}
++
++__attribute__((noinline, noclone)) void
++bar (const char *x, long long y)
++{
++  asm volatile ("" : : "r" (x), "r" (&y) : "memory");
++  if (y != 0)
++    __builtin_abort ();
++}
++
++int
++main ()
++{
++  int k = 0;
++  b = &k;
++  j = (!a) - (c <= e);
++  *i = j;
++  foo (a, &k);
++  bar ("", f);
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/Wcxx-compat-22.c
+@@ -0,0 +1,8 @@
++/* { dg-do compile } */
++/* { dg-options "-Wc++-compat" } */
++struct A {}; /* { dg-warning "empty struct has size 0 in C" } */
++union B {}; /* { dg-warning "empty union has size 0 in C" } */
++struct C { struct D {}; int x; }; /* { dg-warning "empty struct has size 0 in C|declaration does not declare anything" } */
++struct E { union F {}; int x; }; /* { dg-warning "empty union has size 0 in C|declaration does not declare anything" } */
++union G { union H {}; int x; }; /* { dg-warning "empty union has size 0 in C|declaration does not declare anything" } */
++union I { struct J {}; int x; }; /* { dg-warning "empty struct has size 0 in C|declaration does not declare anything" } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/dx-test.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-dx" } */
++
++void f(void)
++{}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/loop-8.c
+@@ -0,0 +1,24 @@
++/* { dg-do compile } */
++/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
++
++void
++f (int *a, int *b)
++{
++  int i;
++
++  for (i = 0; i < 100; i++)
++    {
++      int d = 42;
++
++      a[i] = d;
++      if (i % 2)
++	d = i;
++      b[i] = d;
++    }
++}
++
++/* Load of 42 is moved out of the loop, introducing a new pseudo register.  */
++/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */
++/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */
++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/loop-9.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
++
++void
++f (double *a)
++{
++  int i;
++  for (i = 0; i < 100; i++)
++    a[i] = 18.4242;
++}
++
++/* Load of x is moved out of the loop.  */
++/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */
++/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */
++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c
+@@ -0,0 +1,43 @@
++/* { dg-do compile { target x86_64-*-* } } */
++/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
++/* NOTE: The target list above could be extended to other targets that have
++         conditional moves, but don't have zero registers.  */
++
++enum test_type
++{
++  TYPE0,
++  TYPE1
++};
++
++struct type_node
++{
++  enum test_type type;
++};
++
++struct test_ref
++{
++  struct type_node *referring;
++};
++
++struct test_node
++{
++  struct test_node *next;
++};
++
++int iterate (struct test_node *, unsigned, struct test_ref **);
++
++int
++loop_invar (struct test_node *node)
++{
++  struct test_ref *ref;
++
++  for (unsigned i = 0; iterate (node, i, &ref); i++)
++    if (loop_invar ((ref->referring && ref->referring->type == TYPE0)
++                    ? ((struct test_node *) (ref->referring)) : 0))
++      return 1;
++
++  return 0;
++}
++
++/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */
++/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c
+@@ -0,0 +1,6 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
++
++#define EXPN (-6 * (0.5*0.5*0.5*0.5))
++
++#include "pow-sqrt.x"
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c
+@@ -0,0 +1,5 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
++
++#define EXPN (-5.875)
++#include "pow-sqrt.x"
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c
+@@ -0,0 +1,5 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */
++
++#define EXPN (1.25)
++#include "pow-sqrt.x"
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
+@@ -0,0 +1,38 @@
++/* { dg-do compile { target sqrt_insn } } */
++/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */
++/* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */
++
++double
++foo (double a)
++{
++  return __builtin_pow (a, -5.875);
++}
++
++double
++foof (double a)
++{
++  return __builtin_pow (a, 0.75f);
++}
++
++double
++bar (double a)
++{
++  return __builtin_pow (a, 1.0 + 0.00390625);
++}
++
++double
++baz (double a)
++{
++  return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375);
++}
++
++#define N 256
++void
++vecfoo (double *a)
++{
++  for (int i = 0; i < N; i++)
++    a[i] = __builtin_pow (a[i], 1.25);
++}
++
++/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */
++/* { dg-final { cleanup-tree-dump "sincos" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x
+@@ -0,0 +1,30 @@
++
++extern void abort (void);
++
++
++__attribute__((noinline)) double
++real_pow (double x, double pow_exp)
++{
++  return __builtin_pow (x, pow_exp);
++}
++
++#define EPS (0.000000000000000000001)
++
++#define SYNTH_POW(X, Y) __builtin_pow (X, Y)
++volatile double arg;
++
++int
++main (void)
++{
++  double i_arg = 0.1;
++
++  for (arg = i_arg; arg < 100.0; arg += 1.0)
++    {
++      double synth_res = SYNTH_POW (arg, EXPN);
++      double real_res = real_pow (arg, EXPN);
++
++      if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS)
++	abort ();
++    }
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pr49551.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdata-sections" } */
 +
-+;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also
-+;; getting reused later.
-+(define_insn "andsi_not_shiftsi_si_scc"
-+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
-+	(compare:CC_NOOV
-+		(and:SI (not:SI (match_operator:SI 0 "shift_operator"
-+			[(match_operand:SI 1 "s_register_operand" "r")
-+			 (match_operand:SI 2 "arm_rhs_operand" "rM")]))
-+			(match_operand:SI 3 "s_register_operand" "r"))
-+		(const_int 0)))
-+	(set (match_operand:SI 4 "s_register_operand" "=r")
-+	     (and:SI (not:SI (match_op_dup 0
-+		     [(match_dup 1)
-+		      (match_dup 2)]))
-+		     (match_dup 3)))])]
-+  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
-+  "bic%.%?\\t%4, %3, %1%S0"
-+  [(set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "conds" "set")
-+   (set_attr "shift" "1")
-+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
-+		      (const_string "logic_shift_imm")
-+		      (const_string "logic_shift_reg")))]
-+)
++int x = 1;
++int x;
 +
- (define_insn "*andsi_notsi_si_compare0"
-   [(set (reg:CC_NOOV CC_REGNUM)
- 	(compare:CC_NOOV
-@@ -5076,7 +5125,7 @@
- 
- (define_split
-   [(set (match_operand:SI 0 "s_register_operand" "")
--	(ior_xor:SI (and:SI (ashift:SI
-+	(IOR_XOR:SI (and:SI (ashift:SI
- 			     (match_operand:SI 1 "s_register_operand" "")
- 			     (match_operand:SI 2 "const_int_operand" ""))
- 			    (match_operand:SI 3 "const_int_operand" ""))
-@@ -5088,7 +5137,7 @@
-        == (GET_MODE_MASK (GET_MODE (operands[5]))
-            & (GET_MODE_MASK (GET_MODE (operands[5]))
- 	      << (INTVAL (operands[2])))))"
--  [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
-+  [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2))
- 				  (match_dup 4)))
-    (set (match_dup 0) (zero_extend:SI (match_dup 5)))]
-   "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
-@@ -5667,7 +5716,7 @@
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "length" "4")
--   (set_attr "type" "mov_imm")]
-+   (set_attr "type" "alu_sreg")]
- )
- 
- (define_insn "*arm_movsi_insn"
-@@ -6712,7 +6761,7 @@
- 
-   /* Support only fixed point registers.  */
-   if (!CONST_INT_P (operands[2])
--      || INTVAL (operands[2]) > 14
-+      || INTVAL (operands[2]) > MAX_LDM_STM_OPS
-       || INTVAL (operands[2]) < 2
-       || !MEM_P (operands[1])
-       || !REG_P (operands[0])
-@@ -6737,7 +6786,7 @@
- 
-   /* Support only fixed point registers.  */
-   if (!CONST_INT_P (operands[2])
--      || INTVAL (operands[2]) > 14
-+      || INTVAL (operands[2]) > MAX_LDM_STM_OPS
-       || INTVAL (operands[2]) < 2
-       || !REG_P (operands[1])
-       || !MEM_P (operands[0])
-@@ -6922,7 +6971,7 @@
-   [(set_attr "conds" "set")
-    (set_attr "shift" "1")
-    (set_attr "arch" "32,a,a")
--   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
-+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
- 
- (define_insn "*cmpsi_shiftsi_swp"
-   [(set (reg:CC_SWP CC_REGNUM)
-@@ -6935,7 +6984,7 @@
-   [(set_attr "conds" "set")
-    (set_attr "shift" "1")
-    (set_attr "arch" "32,a,a")
--   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
-+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
- 
- (define_insn "*arm_cmpsi_negshiftsi_si"
-   [(set (reg:CC_Z CC_REGNUM)
-@@ -7528,10 +7577,10 @@
-                                         (const_string "mov_imm")
-                                         (const_string "mov_reg"))
-                           (const_string "mvn_imm")
--                          (const_string "mov_reg")
--                          (const_string "mov_reg")
--                          (const_string "mov_reg")
--                          (const_string "mov_reg")])]
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*movsfcc_soft_insn"
-@@ -7884,7 +7933,7 @@
- )
- 
- (define_expand "<return_str>return"
--  [(returns)]
-+  [(RETURNS)]
-   "(TARGET_ARM || (TARGET_THUMB2
-                    && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
-                    && !IS_STACKALIGN (arm_current_func_type ())))
-@@ -7922,7 +7971,7 @@
-   [(set (pc)
-         (if_then_else (match_operator 0 "arm_comparison_operator"
- 		       [(match_operand 1 "cc_register" "") (const_int 0)])
--                      (returns)
-+                      (RETURNS)
-                       (pc)))]
-   "TARGET_ARM  <return_cond_true>"
-   "*
-@@ -7945,7 +7994,7 @@
-         (if_then_else (match_operator 0 "arm_comparison_operator"
- 		       [(match_operand 1 "cc_register" "") (const_int 0)])
-                       (pc)
--		      (returns)))]
-+		      (RETURNS)))]
-   "TARGET_ARM <return_cond_true>"
-   "*
-   {
-@@ -8279,7 +8328,7 @@
- 
- (define_insn "*<arith_shift_insn>_multsi"
-   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
--	(shiftable_ops:SI
-+	(SHIFTABLE_OPS:SI
- 	 (mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
- 		  (match_operand:SI 3 "power_of_two_operand" ""))
- 	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
-@@ -8293,7 +8342,7 @@
- 
- (define_insn "*<arith_shift_insn>_shiftsi"
-   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
--	(shiftable_ops:SI
-+	(SHIFTABLE_OPS:SI
- 	 (match_operator:SI 2 "shift_nomul_operator"
- 	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
- 	   (match_operand:SI 4 "shift_amount_operand" "M,M,r")])
-@@ -8689,7 +8738,14 @@
-     return \"\";
-   "
-   [(set_attr "conds" "use")
--   (set_attr "type" "mov_reg,mov_reg,multiple")
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (if_then_else (match_operand 1 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (const_string "multiple")])
-    (set_attr "length" "4,4,8")]
- )
- 
-@@ -9485,8 +9541,8 @@
-                                         (const_string "alu_imm" )
-                                         (const_string "alu_sreg"))
-                           (const_string "alu_imm")
--                          (const_string "alu_sreg")
--                          (const_string "alu_sreg")])]
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_move_plus"
-@@ -9523,7 +9579,13 @@
-    sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
-   [(set_attr "conds" "use")
-    (set_attr "length" "4,4,8,8")
--   (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 3 "const_int_operand" "")
-+                                        (const_string "alu_imm" )
-+                                        (const_string "alu_sreg"))
-+                          (const_string "alu_imm")
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_arith_arith"
-@@ -9618,7 +9680,11 @@
-    %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
-   [(set_attr "conds" "use")
-    (set_attr "length" "4,8")
--   (set_attr "type" "alu_shift_reg,multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 3 "const_int_operand" "")
-+                                        (const_string "alu_shift_imm" )
-+                                        (const_string "alu_shift_reg"))
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_move_arith"
-@@ -9679,7 +9745,11 @@
-    %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
-   [(set_attr "conds" "use")
-    (set_attr "length" "4,8")
--   (set_attr "type" "alu_shift_reg,multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 3 "const_int_operand" "")
-+                                        (const_string "alu_shift_imm" )
-+                                        (const_string "alu_shift_reg"))
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_move_not"
-@@ -9786,7 +9856,12 @@
-   [(set_attr "conds" "use")
-    (set_attr "shift" "2")
-    (set_attr "length" "4,8,8")
--   (set_attr "type" "mov_shift_reg,multiple,multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 3 "const_int_operand" "")
-+                                        (const_string "mov_shift" )
-+                                        (const_string "mov_shift_reg"))
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_move_shift"
-@@ -9824,7 +9899,12 @@
-   [(set_attr "conds" "use")
-    (set_attr "shift" "2")
-    (set_attr "length" "4,8,8")
--   (set_attr "type" "mov_shift_reg,multiple,multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 3 "const_int_operand" "")
-+                                        (const_string "mov_shift" )
-+                                        (const_string "mov_shift_reg"))
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*ifcompare_shift_shift"
-@@ -10905,7 +10985,7 @@
-  [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")
-   (set_attr "length" "4")
--  (set_attr "type" "mov_imm")]
-+  (set_attr "type" "alu_sreg")]
- )
- 
- (define_insn "*arm_rev"
---- a/src/gcc/config/arm/iterators.md
-+++ b/src/gcc/config/arm/iterators.md
-@@ -181,39 +181,53 @@
- ;; compare a second time.
- (define_code_iterator LTUGEU [ltu geu])
- 
-+;; The signed gt, ge comparisons
-+(define_code_iterator GTGE [gt ge])
++/* { dg-final { scan-assembler-not {comm[\t ]+x} } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/pr67043.c
+@@ -0,0 +1,32 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fcompare-debug -w" } */
++
++extern void rt_mutex_owner (void);
++extern void rt_mutex_deadlock_account_lock (int);
++extern void signal_pending (void);
++__typeof__ (int *) a;
++int b;
++
++int
++try_to_take_rt_mutex (int p1) {
++  rt_mutex_owner ();
++  if (b)
++    return 0;
++  rt_mutex_deadlock_account_lock (p1);
++  return 1;
++}
++
++void
++__rt_mutex_slowlock (int p1) {
++  int c;
++  for (;;) {
++    c = ({
++      asm ("" : "=r"(a));
++      a;
++    });
++    if (try_to_take_rt_mutex (c))
++      break;
++    if (__builtin_expect (p1 == 0, 0))
++      signal_pending ();
++  }
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "" } */
++/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */
 +
-+;; The unsigned gt, ge comparisons
-+(define_code_iterator GTUGEU [gtu geu])
++void
++f0a (char *result, char *arg1, char *arg4, char temp_6)
++{
++  int idx = 0;
++  for (idx = 0; idx < 416; idx += 1)
++    result[idx] = (arg1[idx] + arg4[idx]) * temp_6;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr64130.c
+@@ -0,0 +1,18 @@
 +
-+;; Comparisons for vc<cmp>
-+(define_code_iterator COMPARISONS [eq gt ge le lt])
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-vrp1" } */
 +
- ;; A list of ...
--(define_code_iterator ior_xor [ior xor])
-+(define_code_iterator IOR_XOR [ior xor])
- 
- ;; Operations on two halves of a quadword vector.
--(define_code_iterator vqh_ops [plus smin smax umin umax])
-+(define_code_iterator VQH_OPS [plus smin smax umin umax])
- 
- ;; Operations on two halves of a quadword vector,
- ;; without unsigned variants (for use with *SFmode pattern).
--(define_code_iterator vqhs_ops [plus smin smax])
-+(define_code_iterator VQHS_OPS [plus smin smax])
- 
- ;; A list of widening operators
- (define_code_iterator SE [sign_extend zero_extend])
++int funsigned (unsigned a)
++{
++  return 0x1ffffffffL / a == 0;
++}
++
++int funsigned2 (unsigned a)
++{
++  if (a < 1) return 1;
++  return (-1 * 0x1ffffffffL) / a == 0;
++}
++
++/* { dg-final { scan-tree-dump ": \\\[2, 8589934591\\\]" "vrp1" } } */
++/* { dg-final { scan-tree-dump ": \\\[-8589934591, -2\\\]" "vrp1" } } */
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
++
++void foo (double *p)
++{
++  int i;
++  for (i = -20000; i < 200000; i+= 40)
++    {
++      p[i+0] = 1.0;
++      p[i+1] = 1.0;
++      p[i+2] = 1.0;
++      p[i+3] = 1.0;
++      p[i+4] = 1.0;
++      p[i+5] = 1.0;
++      p[i+6] = 1.0;
++      p[i+7] = 1.0;
++      p[i+8] = 1.0;
++      p[i+9] = 1.0;
++      p[i+10] = 1.0;
++      p[i+11] = 1.0;
++      p[i+12] = 1.0;
++      p[i+13] = 1.0;
++      p[i+14] = 1.0;
++      p[i+15] = 1.0;
++      p[i+16] = 1.0;
++      p[i+17] = 1.0;
++      p[i+18] = 1.0;
++      p[i+19] = 1.0;
++      p[i+20] = 1.0;
++      p[i+21] = 1.0;
++      p[i+22] = 1.0;
++      p[i+23] = 1.0;
++      p[i+24] = 1.0;
++      p[i+25] = 1.0;
++      p[i+26] = 1.0;
++      p[i+27] = 1.0;
++      p[i+28] = 1.0;
++      p[i+29] = 1.0;
++      p[i+30] = 1.0;
++      p[i+31] = 1.0;
++      p[i+32] = 1.0;
++      p[i+33] = 1.0;
++      p[i+34] = 1.0;
++      p[i+35] = 1.0;
++      p[i+36] = 1.0;
++      p[i+37] = 1.0;
++      p[i+38] = 1.0;
++      p[i+39] = 1.0;
++    }
++}
++
++/* We should groups address type IV uses.  */
++/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } }  */
++/* { dg-final { cleanup-tree-dump "ivopts" } }  */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr66726-2.c
+@@ -0,0 +1,19 @@
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-phiopt1-details" } */
++
++extern void bar (char, char);
++int
++foo (char b)
++{
++  char a;
++  a = b;
++  b = 'b';
++  bar (a, b);
++  b = a;
++  if (b == 0)
++    a++;
++  return a + b;
++}
++
++/* { dg-final { scan-tree-dump-times "factor conversion out" 0 "phiopt1" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr66726.c
+@@ -0,0 +1,15 @@
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-phiopt1-details" } */
++
++extern unsigned short mode_size[];
++
++int
++oof (int mode)
++{
++  return (64 < mode_size[mode] ? 64 : mode_size[mode]);
++}
++
++/* { dg-final { scan-tree-dump-times "factor conversion out" 1 "phiopt1" } } */
++/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "phiopt1" } } */
++
+--- a/src/gcc/testsuite/gcc.dg/vect/pr59354.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr59354.c
+@@ -1,4 +1,3 @@
+-/* { dg-do run } */
+ /* { dg-additional-options "-O3" } */
  
- ;; Right shifts
--(define_code_iterator rshifts [ashiftrt lshiftrt])
-+(define_code_iterator RSHIFTS [ashiftrt lshiftrt])
+ #include "tree-vect.h"
+--- a/src/gcc/testsuite/gcc.dg/vect/pr64252.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr64252.c
+@@ -1,6 +1,5 @@
+ /* PR target/64252 */
+ /* Test correctness of size 3 store groups permutation.  */
+-/* { dg-do run } */
+ /* { dg-additional-options "-O3" } */
+ /* { dg-additional-options "-mavx" { target avx_runtime } } */
  
- ;; Iterator for integer conversions
- (define_code_iterator FIXUORS [fix unsigned_fix])
+--- a/src/gcc/testsuite/gcc.dg/vect/pr64404.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr64404.c
+@@ -1,4 +1,3 @@
+-/* { dg-do run } */
+ /* { dg-additional-options "--param=sccvn-max-alias-queries-per-access=1" } */
  
- ;; Binary operators whose second operand can be shifted.
--(define_code_iterator shiftable_ops [plus minus ior xor and])
-+(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])
+ #include "tree-vect.h"
+--- a/src/gcc/testsuite/gcc.dg/vect/pr64493.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr64493.c
+@@ -1,5 +1,3 @@
+-/* { dg-do run } */
+-
+ #include "tree-vect.h"
  
--;; plus and minus are the only shiftable_ops for which Thumb2 allows
-+;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
- ;; a stack pointer opoerand.  The minus operation is a candidate for an rsub
- ;; and hence only plus is supported.
- (define_code_attr t2_binop0
-   [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
+ int a, b, c, d, e, f, g, h;
+--- a/src/gcc/testsuite/gcc.dg/vect/pr64495.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr64495.c
+@@ -1,5 +1,3 @@
+-/* { dg-do run } */
+-
+ #include <assert.h>
+ #include "tree-vect.h"
  
--;; The instruction to use when a shiftable_ops has a shift operation as
-+;; The instruction to use when a SHIFTABLE_OPS has a shift operation as
- ;; its first operand.
- (define_code_attr arith_shift_insn
-   [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
+--- a/src/gcc/testsuite/gcc.dg/vect/pr64844.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr64844.c
+@@ -1,4 +1,3 @@
+-/* { dg-do run } */
+ /* { dg-require-effective-target vect_double } */
+ /* { dg-additional-options "-ffast-math" } */
  
-+(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le")
-+                          (gtu "gt") (geu "ge")])
-+
-+(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")])
-+
- ;;----------------------------------------------------------------------------
- ;; Int iterators
- ;;----------------------------------------------------------------------------
-@@ -221,6 +235,10 @@
- (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
-                             UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
+--- a/src/gcc/testsuite/gcc.dg/vect/pr65518.c
++++ b/src/gcc/testsuite/gcc.dg/vect/pr65518.c
+@@ -1,5 +1,3 @@
+-/* { dg-do run } */
+-
+ extern void abort (void);
+ 
+ typedef struct giga
+--- a/src/gcc/testsuite/gcc.dg/vect/vect-aggressive-1.c
++++ b/src/gcc/testsuite/gcc.dg/vect/vect-aggressive-1.c
+@@ -1,4 +1,3 @@
+-/* { dg-do run } */
+ /* { dg-require-effective-target vect_condition } */
+ /* { dg-require-effective-target vect_simd_clones } */
+ /* { dg-additional-options "-fopenmp-simd" } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
+@@ -12,6 +12,8 @@
  
-+(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE])
-+
-+(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT])
+ vf2_t vf2 = (vf2_t){ 17.f, 18.f };
+ vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead };
++vlf1_t vlf1 = (vlf1_t) { 17.0 };
 +
- (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
- 
- (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
-@@ -677,6 +695,11 @@
+ union int128_t qword;
  
- ])
+ int *int_ptr = (int *)0xabcdef0123456789ULL;
+@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11,   long double, 98765432123456789.987654321L, Q0, flat)
+ FUNC_VAL_CHECK (12,         vf2_t,        vf2, D0, f32in64)
+ FUNC_VAL_CHECK (13,         vi4_t,        vi4, Q0, i32in128)
+ FUNC_VAL_CHECK (14,         int *,    int_ptr, X0, flat)
++FUNC_VAL_CHECK (15,         vlf1_t,    vlf1, Q0, flat)
+ #endif
+--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
++++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
+@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16)));
+ /* 128-bit vector of 4 ints.  */
+ typedef int vi4_t __attribute__((vector_size (16)));
  
-+(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")
-+                              (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")
-+                              (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")
-+                              (UNSPEC_VCAGT "gt")])
++/* 128-bit vector of 1 quad precision float.  */
++typedef long double vlf1_t __attribute__((vector_size (16)));
 +
- (define_int_attr r [
-   (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r")
-   (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "")
-@@ -774,7 +797,7 @@
-                           (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])
+ /* signed quad-word (in an union for the convenience of initialization).  */
+ union int128_t
+ {
+--- a/src/gcc/testsuite/gcc.target/aarch64/abs_1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/abs_1.c
+@@ -7,15 +7,14 @@ extern void abort (void);
+ long long
+ abs64 (long long a)
+ {
+-  /* { dg-final { scan-assembler "eor\t" } } */
+-  /* { dg-final { scan-assembler "sub\t" } } */
++  /* { dg-final { scan-assembler "csneg\t" } } */
+   return llabs (a);
+ }
  
- ;; Both kinds of return insn.
--(define_code_iterator returns [return simple_return])
-+(define_code_iterator RETURNS [return simple_return])
- (define_code_attr return_str [(return "") (simple_return "simple_")])
- (define_code_attr return_simple_p [(return "false") (simple_return "true")])
- (define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
---- a/src/gcc/config/arm/iwmmxt.md
-+++ b/src/gcc/config/arm/iwmmxt.md
-@@ -107,8 +107,8 @@
- )
+ long long
+ abs64_in_dreg (long long a)
+ {
+-  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
++  /* { dg-final { scan-assembler "csneg\t" } } */
+   register long long x asm ("d8") = a;
+   register long long y asm ("d9");
+   asm volatile ("" : : "w" (x));
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
+@@ -27,14 +27,26 @@ load_lib gcc-dg.exp
  
- (define_insn "*iwmmxt_arm_movdi"
--  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv")
--        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))]
-+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
-+        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,r,y,Uy,y,  r,*w,*w,*Uvi,*w"))]
-   "TARGET_REALLY_IWMMXT
-    && (   register_operand (operands[0], DImode)
-        || register_operand (operands[1], DImode))"
---- a/src/gcc/config/arm/linux-eabi.h
-+++ b/src/gcc/config/arm/linux-eabi.h
-@@ -77,6 +77,23 @@
-     %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \
-     %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"
+ # Initialize `dg'.
+ load_lib c-torture.exp
+-load_lib target-supports.exp
+-load_lib torture-options.exp
  
-+/* For ARM musl currently supports four dynamic linkers:
-+   - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI
-+   - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI
-+   - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB
-+   - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB
-+   musl does not support the legacy OABI mode.
-+   All the dynamic linkers live in /lib.
-+   We default to soft-float, EL. */
-+#undef  MUSL_DYNAMIC_LINKER
-+#if TARGET_BIG_ENDIAN_DEFAULT
-+#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}"
-+#else
-+#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}"
-+#endif
-+#define MUSL_DYNAMIC_LINKER \
-+  "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1"
+ dg-init
+ 
+-if {[istarget arm*-*-*]
+-    && ![check_effective_target_arm_neon_ok]} then {
+-  return
++# The default action for a test is 'run'.  Save current default.
++global dg-do-what-default
++set save-dg-do-what-default ${dg-do-what-default}
 +
- /* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
-    use the GNU/Linux version, not the generic BPABI version.  */
- #undef  LINK_SPEC
-@@ -107,6 +124,7 @@
- 
- #undef	ENDFILE_SPEC
- #define ENDFILE_SPEC \
-+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
-   LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
++# For ARM, make sure that we have a target compatible with NEON, and do
++# not attempt to run execution tests if the hardware doesn't support it.
++if {[istarget arm*-*-*]} then {
++    if {![check_effective_target_arm_neon_ok]} then {
++      return
++    }
++    if {![is-effective-target arm_neon_hw]} then {
++        set dg-do-what-default compile
++    } else {
++        set dg-do-what-default run
++    }
++} else {
++    set dg-do-what-default run
+ }
  
- /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
---- a/src/gcc/config/arm/neon.md
-+++ b/src/gcc/config/arm/neon.md
-@@ -1114,7 +1114,7 @@
- ;; lshrdi3_neon
- (define_insn_and_split "<shift>di3_neon"
-   [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?w,?w")
--	(rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
-+	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
- 		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
-    (clobber (match_scratch:SI 3				     "=2r, X, &r, X,2r, X"))
-    (clobber (match_scratch:SI 4				     "= X, X, &r, X, X, X"))
-@@ -1194,71 +1194,6 @@
-   [(set_attr "type" "neon_add_widen")]
- )
+ torture-init
+@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
+ set additional_flags [add_options_for_arm_neon ""]
  
--;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
--;; shift-count granularity. That's good enough for the middle-end's current
--;; needs.
--
--;; Note that it's not safe to perform such an operation in big-endian mode,
--;; due to element-ordering issues.
--
--(define_expand "vec_shr_<mode>"
--  [(match_operand:VDQ 0 "s_register_operand" "")
--   (match_operand:VDQ 1 "s_register_operand" "")
--   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
--  "TARGET_NEON && !BYTES_BIG_ENDIAN"
--{
--  rtx zero_reg;
--  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
--  const int width = GET_MODE_BITSIZE (<MODE>mode);
--  const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
--  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
--    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
--
--  if (num_bits == width)
--    {
--      emit_move_insn (operands[0], operands[1]);
--      DONE;
--    }
--
--  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
--  operands[0] = gen_lowpart (bvecmode, operands[0]);
--  operands[1] = gen_lowpart (bvecmode, operands[1]);
--
--  emit_insn (gen_ext (operands[0], operands[1], zero_reg,
--		      GEN_INT (num_bits / BITS_PER_UNIT)));
--  DONE;
--})
--
--(define_expand "vec_shl_<mode>"
--  [(match_operand:VDQ 0 "s_register_operand" "")
--   (match_operand:VDQ 1 "s_register_operand" "")
--   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
--  "TARGET_NEON && !BYTES_BIG_ENDIAN"
--{
--  rtx zero_reg;
--  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
--  const int width = GET_MODE_BITSIZE (<MODE>mode);
--  const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
--  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
--    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
--
--  if (num_bits == 0)
--    {
--      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
--      DONE;
+ # Main loop.
+-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
+-    # If we're only testing specific files and this isn't one of them, skip it.
+-    if ![runtest_file_p $runtests $src] then {
+-	continue
 -    }
 -
--  num_bits = width - num_bits;
--
--  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
--  operands[0] = gen_lowpart (bvecmode, operands[0]);
--  operands[1] = gen_lowpart (bvecmode, operands[1]);
--
--  emit_insn (gen_ext (operands[0], zero_reg, operands[1],
--		      GEN_INT (num_bits / BITS_PER_UNIT)));
--  DONE;
--})
--
- ;; Helpers for quad-word reduction operations
+-    # runtest_file_p is already run above, and the code below can run
+-    # runtest_file_p again, make sure everything for this test is
+-    # performed if the above runtest_file_p decided this runtest
+-    # instance should execute the test
+-    gcc_parallel_test_enable 0
+-    c-torture-execute $src $additional_flags
+-    gcc-dg-runtest $src "" $additional_flags
+-    gcc_parallel_test_enable 1
+-}
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
++	       "" ${additional_flags}
  
- ; Add (or smin, smax...) the low N/2 elements of the N-element vector
-@@ -1267,7 +1202,7 @@
+ # All done.
++set dg-do-what-default ${save-dg-do-what-default}
+ torture-finish
+ dg-finish
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+@@ -235,7 +235,8 @@ extern ARRAY(expected, hfloat, 64, 2);
+ 
+ typedef union {
+   struct {
+-    int _xxx:25;
++    int _xxx:24;
++    unsigned int FZ:1;
+     unsigned int DN:1;
+     unsigned int AHP:1;
+     unsigned int QC:1;
+@@ -258,7 +259,8 @@ typedef union {
+     unsigned int QC:1;
+     unsigned int AHP:1;
+     unsigned int DN:1;
+-    int _dnm:25;
++    unsigned int FZ:1;
++    int _dnm:24;
+   } b;
+   unsigned int word;
+ } _ARM_FPSCR;
+@@ -395,10 +397,15 @@ static void clean_results (void)
+ #if defined(__aarch64__)
+   /* On AArch64, make sure to return DefaultNaN to have the same
+      results as on AArch32.  */
+-  _ARM_FPSCR _afpscr_for_dn;
+-  asm volatile ("mrs %0,fpcr" : "=r" (_afpscr_for_dn));
+-  _afpscr_for_dn.b.DN = 1;
+-  asm volatile ("msr fpcr,%0" : : "r" (_afpscr_for_dn));
++  _ARM_FPSCR _afpscr;
++  asm volatile ("mrs %0,fpcr" : "=r" (_afpscr));
++  _afpscr.b.DN = 1;
++
++  /* On AArch64, make sure to flush to zero by default, as on
++     AArch32. */
++  _afpscr.b.FZ = 1;
++
++  asm volatile ("msr fpcr,%0" : : "r" (_afpscr));
+ #endif
+ }
  
- (define_insn "quad_halves_<code>v4si"
-   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
--        (vqh_ops:V2SI
-+        (VQH_OPS:V2SI
-           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
-                            (parallel [(const_int 0) (const_int 1)]))
-           (vec_select:V2SI (match_dup 1)
-@@ -1280,7 +1215,7 @@
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc
+@@ -55,7 +55,22 @@ void FNNAME (INSN_NAME) (void)
+   /* Apply a binary operator named INSN_NAME.  */
+   TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ 
+ #ifdef EXTRA_TESTS
+   EXTRA_TESTS();
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
+@@ -76,7 +76,22 @@ void FNNAME (INSN_NAME) (void)
+   TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_cumulative_sat, "");
+   TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat, "");
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ 
+ #ifdef EXTRA_TESTS
+   EXTRA_TESTS();
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc
+@@ -57,7 +57,12 @@ void FNNAME (INSN_NAME) (void)
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+ 
+ #ifdef EXTRA_TESTS
+   EXTRA_TESTS();
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc
+@@ -60,7 +60,12 @@ void FNNAME (INSN_NAME) (void)
+   TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4);
+   TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ }
  
- (define_insn "quad_halves_<code>v4sf"
-   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
--        (vqhs_ops:V2SF
-+        (VQHS_OPS:V2SF
-           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
-                            (parallel [(const_int 0) (const_int 1)]))
-           (vec_select:V2SF (match_dup 1)
-@@ -1293,7 +1228,7 @@
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc
+@@ -60,7 +60,12 @@ void FNNAME (INSN_NAME) (void)
+   TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4);
+   TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ }
  
- (define_insn "quad_halves_<code>v8hi"
-   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
--        (vqh_ops:V4HI
-+        (VQH_OPS:V4HI
-           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
-                            (parallel [(const_int 0) (const_int 1)
- 				      (const_int 2) (const_int 3)]))
-@@ -1308,7 +1243,7 @@
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c
+@@ -7,16 +7,10 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+ 				       0xfa, 0xfb, 0xfc, 0xfd };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
+ 					0x57, 0x58, 0x59, 0x5a };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+ 					0x62, 0x63, 0x64, 0x65,
+ 					0x66, 0x67, 0x68, 0x69,
+@@ -24,8 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
+ 					0xba0, 0xba1, 0xba2, 0xba3 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+ 					 0xfc, 0xfd, 0xfe, 0xff,
+ 					 0x0, 0x1, 0x2, 0x3,
+@@ -33,16 +25,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
+ 					 0xfffd, 0xfffe, 0xffff, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define TEST_MSG "VABA/VABAQ"
+ void exec_vaba (void)
+@@ -132,7 +114,18 @@ void exec_vaba (void)
+   TEST_VABA(q, uint, u, 16, 8);
+   TEST_VABA(q, uint, u, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+ }
  
- (define_insn "quad_halves_<code>v16qi"
-   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
--        (vqh_ops:V8QI
-+        (VQH_OPS:V8QI
-           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
-                            (parallel [(const_int 0) (const_int 1)
- 				      (const_int 2) (const_int 3)
-@@ -2200,134 +2135,140 @@
-   [(set_attr "type" "neon_sub_halve_narrow_q")]
- )
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c
+@@ -3,45 +3,15 @@
+ #include "compute-ref-data.h"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9,
+ 					0xfffa, 0xfffb, 0xfffc, 0xfffd };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56,
+ 					 0x57, 0x58, 0x59, 0x5a };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7,
+ 					 0xffffffe8 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results for cases with input values chosen to test
+    possible intermediate overflow.  */
+@@ -121,7 +91,12 @@ void exec_vabal (void)
+   TEST_VABAL(uint, u, 16, 32, 4);
+   TEST_VABAL(uint, u, 32, 64, 2);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ 
+   /* Use values that could lead to overflow intermediate
+    * calculations.  */
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c
+@@ -8,15 +8,10 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe,
+ 				       0xd, 0xc, 0xb, 0xa };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+ 					0xf3, 0xf4, 0xf5, 0xf6 };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17,
+ 					0x16, 0x15, 0x14, 0x13,
+@@ -25,8 +20,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17,
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1,
+ 					0x0, 0x1, 0x2, 0x3 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+ 					 0xea, 0xeb, 0xec, 0xed,
+ 					 0xee, 0xef, 0xf0, 0xf1,
+@@ -35,14 +28,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7,
+ 					 0xffe8, 0xffe9, 0xffea, 0xffeb };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1,
+ 					 0xffffffd2, 0xffffffd3 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1,
+ 					   0x42387ae1, 0x42347ae1 };
+ 
+@@ -130,7 +115,20 @@ void exec_vabd (void)
+   TEST_VABD(q, uint, u, 32, 4);
+   TEST_VABD(q, float, f, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+ 
+ 
+   /* Extra FP tests with special values (-0.0, ....) */
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c
+@@ -3,45 +3,15 @@
+ #include "compute-ref-data.h"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe,
+ 					0xd, 0xc, 0xb, 0xa };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+ 					 0xf3, 0xf4, 0xf5, 0xf6 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8,
+ 					 0xffffffe9 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define TEST_MSG "VABDL"
+ void exec_vabdl (void)
+@@ -99,7 +69,12 @@ void exec_vabdl (void)
+   TEST_VABDL(uint, u, 16, 32, 4);
+   TEST_VABDL(uint, u, 32, 64, 2);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ }
  
--(define_insn "neon_vceq<mode>"
--  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
--	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
--          UNSPEC_VCEQ))]
-+;; These may expand to an UNSPEC pattern when a floating point mode is used
-+;; without unsafe math optimizations.
-+(define_expand "neon_vc<cmp_op><mode>"
-+  [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
-+     (neg:<V_cmp_result>
-+       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
-+                         (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
-   "TARGET_NEON"
--  "@
--  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
--  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
--  [(set (attr "type")
--      (if_then_else (match_test "<Is_float_mode>")
--                    (const_string "neon_fp_compare_s<q>")
--                    (if_then_else (match_operand 2 "zero_operand")
--                      (const_string "neon_compare_zero<q>")
--                      (const_string "neon_compare<q>"))))]
-+  {
-+    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
-+       are enabled.  */
-+    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
-+        && !flag_unsafe_math_optimizations)
-+      {
-+        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
-+           we define gen_neon_vceq<mode>_insn_unspec only for float modes
-+           whereas this expander iterates over the integer modes as well,
-+           but we will never expand to UNSPECs for the integer comparisons.  */
-+        switch (<MODE>mode)
-+          {
-+            case V2SFmode:
-+              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
-+                                                              operands[1],
-+                                                              operands[2]));
-+              break;
-+            case V4SFmode:
-+              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
-+                                                              operands[1],
-+                                                              operands[2]));
-+              break;
-+            default:
-+              gcc_unreachable ();
-+          }
-+      }
-+    else
-+      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
-+                                                 operands[1],
-+                                                 operands[2]));
-+    DONE;
-+  }
- )
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c
+@@ -12,41 +12,11 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 				       0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+ 					0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					  0x33333333, 0x33333333 };
+ 
+ /* Expected results for float32 variants. Needs to be separated since
+    the generic test function does not test floating-point
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c
+@@ -18,10 +18,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+ 					0xea, 0xeb, 0xec, 0xed,
+ 					0xee, 0xef, 0xf0, 0xf1,
+@@ -40,14 +36,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6,
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+ 					 0xfffffffffffffff4 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results for float32 variants. Needs to be separated since
+    the generic test function does not test floating-point
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c
+@@ -6,46 +6,16 @@
+ #define TEST_MSG "VADDL"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+ 					 0xffe7, 0xffe8, 0xffe9, 0xffea };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+ 					0xffffffe4, 0xffffffe5 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+ 					0xffffffffffffffe1 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6,
+ 					 0x1e7, 0x1e8, 0x1e9, 0x1ea };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2,
+ 					 0x1ffe3, 0x1ffe4 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #include "vXXXl.inc"
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c
+@@ -6,46 +6,16 @@
+ #define TEST_MSG "VADDW"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+ 					 0xffe7, 0xffe8, 0xffe9, 0xffea };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+ 					0xffffffe4, 0xffffffe5 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+ 					0xffffffffffffffe1 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6,
+ 					 0xe7, 0xe8, 0xe9, 0xea };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2,
+ 					 0xffe3, 0xffe4 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #include "vXXXw.inc"
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c
+@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2,
+ 					0xf4, 0xf4, 0xf6, 0xf6,
+ 					0xf0, 0xf0, 0xf2, 0xf2,
+@@ -35,11 +31,3 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+ 					 0x0, 0x1, 0x2, 0x3 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c
+@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1,
+ 					0x0, 0x1, 0x0, 0x1,
+ 					0x8, 0x9, 0x8, 0x9,
+@@ -36,11 +32,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0,
+ 					 0xffffffc0, 0xffffffc0 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+ 					 0xfffffffffffffff0 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c
+@@ -4,48 +4,9 @@
+ #include "cmp_fp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+ 					 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c
+@@ -4,47 +4,9 @@
+ #include "cmp_fp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+ 					 0x0, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c
+@@ -4,46 +4,8 @@
+ #include "cmp_fp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c
+@@ -4,46 +4,8 @@
+ #include "cmp_fp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					 0x333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c
+@@ -8,29 +8,9 @@ void exec_vceq_p8(void);
+ #include "cmp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+@@ -38,16 +18,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0xffff, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+ 					     0x0, 0x0, 0x0, 0x0 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c
+@@ -4,29 +4,9 @@
+ #include "cmp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+@@ -34,16 +14,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0xffff, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+ 					     0xff, 0xff, 0xff, 0xff };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c
+@@ -4,29 +4,9 @@
+ #include "cmp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0x0,
+@@ -34,16 +14,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					 0x0, 0x0, 0x0, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 					     0xff, 0xff, 0xff, 0xff };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c
+@@ -4,30 +4,10 @@
+ #include "cmp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+ 					0xff, 0xff, 0xff, 0x0 };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+ 					 0xff, 0xff, 0xff, 0xff,
+ 					 0xff, 0xff, 0xff, 0xff,
+@@ -36,16 +16,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+ 					 0xffff, 0xffff, 0xffff, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+ 					 0xffffffff, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+ 					     0x0, 0x0, 0x0, 0x0 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c
+@@ -6,16 +6,6 @@
+ VECT_VAR_DECL(expected,int,8,8) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x2, 0x2, 0x2, 0x2 };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7,
+ 					0x7, 0x7, 0x7, 0x7,
+ 					0x7, 0x7, 0x7, 0x7,
+@@ -23,45 +13,12 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7,
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2,
+ 					0x2, 0x2, 0x2, 0x2 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x14, 0x14, 0x14, 0x14 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results with negative input.  */
+ VECT_VAR_DECL(expected_with_negative,int,8,8) [] = { 0x7, 0x7, 0x7, 0x7,
+ 						     0x7, 0x7, 0x7, 0x7 };
+ VECT_VAR_DECL(expected_with_negative,int,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
+ VECT_VAR_DECL(expected_with_negative,int,32,2) [] = { 0x1, 0x1 };
+-VECT_VAR_DECL(expected_with_negative,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_negative,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-						      0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_negative,uint,16,4) [] = { 0x3333, 0x3333,
+-						       0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_negative,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_with_negative,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_negative,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-						      0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_negative,poly,16,4) [] = { 0x3333, 0x3333,
+-						       0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_negative,hfloat,32,2) [] = { 0x33333333,
+-							 0x33333333 };
+ VECT_VAR_DECL(expected_with_negative,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 						      0x0, 0x0, 0x0, 0x0,
+ 						      0x0, 0x0, 0x0, 0x0,
+@@ -69,32 +26,6 @@ VECT_VAR_DECL(expected_with_negative,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ VECT_VAR_DECL(expected_with_negative,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2,
+ 						      0x2, 0x2, 0x2, 0x2 };
+ VECT_VAR_DECL(expected_with_negative,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+-VECT_VAR_DECL(expected_with_negative,int,64,2) [] = { 0x3333333333333333,
+-						      0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_negative,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_negative,uint,16,8) [] = { 0x3333, 0x3333,
+-						       0x3333, 0x3333,
+-						       0x3333, 0x3333,
+-						       0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_negative,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_with_negative,uint,64,2) [] = { 0x3333333333333333,
+-						       0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_negative,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_negative,poly,16,8) [] = { 0x3333, 0x3333,
+-						       0x3333, 0x3333,
+-						       0x3333, 0x3333,
+-						       0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_negative,hfloat,32,4) [] = { 0x33333333,
+-							 0x33333333,
+-							 0x33333333,
+-							 0x33333333 };
+ 
+ #define INSN_NAME vcls
+ #define TEST_MSG "VCLS/VCLSQ"
+@@ -146,7 +77,13 @@ FNNAME (INSN_NAME)
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, " (positive input)");
++#define MSG_POSITIVE " (positive input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, MSG_POSITIVE);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, MSG_POSITIVE);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, MSG_POSITIVE);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, MSG_POSITIVE);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, MSG_POSITIVE);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, MSG_POSITIVE);
+ 
+   /* Fill input vector with arbitrary values (negative).  */
+   VDUP(vector, , int, s, 8, 8, 0xFF);
+@@ -164,7 +101,13 @@ FNNAME (INSN_NAME)
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+   TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_with_negative, " (negative input)");
++#define MSG_NEGATIVE " (negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_with_negative, MSG_NEGATIVE);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_with_negative, MSG_NEGATIVE);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_with_negative, MSG_NEGATIVE);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_with_negative, MSG_NEGATIVE);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_with_negative, MSG_NEGATIVE);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_with_negative, MSG_NEGATIVE);
+ }
  
--(define_insn "neon_vcge<mode>"
-+(define_insn "neon_vc<cmp_op><mode>_insn"
-   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
--	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
--          UNSPEC_VCGE))]
--  "TARGET_NEON"
--  "@
--  vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
--  vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
-+        (neg:<V_cmp_result>
-+          (COMPARISONS:<V_cmp_result>
-+            (match_operand:VDQW 1 "s_register_operand" "w,w")
-+            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
-+  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
-+                    && !flag_unsafe_math_optimizations)"
-+  {
-+    char pattern[100];
-+    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
-+                      " %%<V_reg>1, %s",
-+                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
-+                         ? "f" : "<cmp_type>",
-+                       which_alternative == 0
-+                         ? "%<V_reg>2" : "#0");
-+    output_asm_insn (pattern, operands);
-+    return "";
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c
+@@ -4,30 +4,10 @@
+ #include "cmp_op.inc"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+ 					0xff, 0xff, 0x0, 0x0 };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+-					0x333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+ 					 0xff, 0xff, 0xff, 0xff,
+ 					 0xff, 0xff, 0xff, 0xff,
+@@ -35,16 +15,6 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+ 					 0xffff, 0xffff, 0x0, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0,
+ 					     0x0, 0x0, 0x0, 0x0 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c
+@@ -6,36 +6,18 @@
+ VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
+ 					0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
+ 					 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd,
+ 					 0xd, 0xd, 0xd, 0xd };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ 
+ /* Expected results with input=0.  */
+@@ -43,16 +25,10 @@ VECT_VAR_DECL(expected_with_0,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+ 					      0x8, 0x8, 0x8, 0x8 };
+ VECT_VAR_DECL(expected_with_0,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+ VECT_VAR_DECL(expected_with_0,int,32,2) [] = { 0x20, 0x20 };
+-VECT_VAR_DECL(expected_with_0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_with_0,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+ 					       0x8, 0x8, 0x8, 0x8 };
+ VECT_VAR_DECL(expected_with_0,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+ VECT_VAR_DECL(expected_with_0,uint,32,2) [] = { 0x20, 0x20 };
+-VECT_VAR_DECL(expected_with_0,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_0,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_0,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_0,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+ 					       0x8, 0x8, 0x8, 0x8,
+ 					       0x8, 0x8, 0x8, 0x8,
+@@ -60,8 +36,6 @@ VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+ VECT_VAR_DECL(expected_with_0,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+ 					       0x10, 0x10, 0x10, 0x10 };
+ VECT_VAR_DECL(expected_with_0,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+-VECT_VAR_DECL(expected_with_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+ 						0x8, 0x8, 0x8, 0x8,
+ 						0x8, 0x8, 0x8, 0x8,
+@@ -69,16 +43,6 @@ VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+ VECT_VAR_DECL(expected_with_0,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+ 						0x10, 0x10, 0x10, 0x10 };
+ VECT_VAR_DECL(expected_with_0,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+-VECT_VAR_DECL(expected_with_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_with_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_with_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_with_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ #define INSN_NAME vclz
+ #define TEST_MSG "VCLZ/VCLZQ"
+@@ -154,7 +118,18 @@ FNNAME (INSN_NAME)
+   TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+   TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+ 
+   /* Test with zero as input.  */
+   VDUP(vector, , int, s, 8, 8, 0);
+@@ -184,7 +159,19 @@ FNNAME (INSN_NAME)
+   TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+   TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_with_0, " (input=0)");
++#define MSG_ZERO " (input=0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_with_0, MSG_ZERO);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_with_0, MSG_ZERO);
+ }
+ 
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c
+@@ -4,37 +4,14 @@
+ 
+ /* Expected results.  */
+ VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,poly,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 					0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
+ 					 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
+ 					 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define INSN_NAME vcnt
+ #define TEST_MSG "VCNT/VCNTQ"
+@@ -86,7 +63,12 @@ FNNAME (INSN_NAME)
+   TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+   TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
+ }
+ 
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+@@ -3,20 +3,6 @@
+ #include "compute-ref-data.h"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					0xf4, 0xf5, 0xf6, 0xf7,
+ 					0x11, 0x11, 0x11, 0x11,
+@@ -88,7 +74,17 @@ void exec_vcombine (void)
+   TEST_VCOMBINE(poly, p, 16, 4, 8);
+   TEST_VCOMBINE(float, f, 32, 2, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+ }
+ 
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
+ 					0x78, 0x56, 0x34, 0x12 };
+ VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+ VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define INSN_NAME vcreate
+ #define TEST_MSG "VCREATE"
+@@ -113,7 +85,17 @@ FNNAME (INSN_NAME)
+   TEST_VCREATE(poly, p, 8, 8);
+   TEST_VCREATE(poly, p, 16, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+ }
+ 
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c
+@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5,
+ 					0x2, 0x3, 0x0, 0x1,
+ 					0xe, 0xf, 0xc, 0xd,
+@@ -37,11 +33,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6,
+ 					 0xffffffc5, 0xffffffc4 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+ 					 0xfffffffffffffff2 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+ 					0xfc, 0xfd, 0xfe, 0xff };
+ VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define TEST_MSG "VGET_HIGH"
+ void exec_vget_high (void)
+@@ -76,7 +48,17 @@ void exec_vget_high (void)
+   TEST_VGET_HIGH(poly, p, 16, 4, 8);
+   TEST_VGET_HIGH(float, f, 32, 2, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+ }
+ 
+ int main (void)
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
+@@ -0,0 +1,125 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected results.  */
++int8_t     expected_s8   = 0xf7;
++int16_t    expected_s16  = 0xfff3;
++int32_t    expected_s32  = 0xfffffff1;
++int64_t    expected_s64  = 0xfffffffffffffff0;
++uint8_t    expected_u8   = 0xf6;
++uint16_t   expected_u16  = 0xfff2;
++uint32_t   expected_u32  = 0xfffffff1;
++uint64_t   expected_u64  = 0xfffffffffffffff0;
++poly8_t    expected_p8   = 0xf6;
++poly16_t   expected_p16  = 0xfff2;
++hfloat32_t expected_f32  = 0xc1700000;
++
++int8_t     expectedq_s8  = 0xff;
++int16_t    expectedq_s16 = 0xfff5;
++int32_t    expectedq_s32 = 0xfffffff3;
++int64_t    expectedq_s64 = 0xfffffffffffffff1;
++uint8_t    expectedq_u8  = 0xfe;
++uint16_t   expectedq_u16 = 0xfff6;
++uint32_t   expectedq_u32 = 0xfffffff2;
++uint64_t   expectedq_u64 = 0xfffffffffffffff1;
++poly8_t    expectedq_p8  = 0xfe;
++poly16_t   expectedq_p16 = 0xfff6;
++hfloat32_t expectedq_f32 = 0xc1500000;
++
++int error_found = 0;
++
++#define TEST_MSG "VGET_LANE"
++void exec_vget_lane (void)
++{
++  /* vec=vget_lane(vec, lane), then store the result.  */
++#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
++  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
++  if (VAR(var, T1, W) != expected##Q##_##T2##W) {			   \
++    fprintf(stderr,							   \
++	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
++	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
++	    TEST_MSG, __FILE__, __LINE__,				   \
++	    STR(expected##Q##_##T2##W),					   \
++	    STR(VECT_NAME(T1, W, N)),					   \
++	    VAR(var, T1, W),						   \
++	    expected##Q##_##T2##W);					   \
++    error_found = 1;							   \
 +  }
-   [(set (attr "type")
--     (if_then_else (match_test "<Is_float_mode>")
--                   (const_string "neon_fp_compare_s<q>")
--                    (if_then_else (match_operand 2 "zero_operand")
-+        (if_then_else (match_operand 2 "zero_operand")
-                       (const_string "neon_compare_zero<q>")
--                      (const_string "neon_compare<q>"))))]
--)
--
--(define_insn "neon_vcgeu<mode>"
--  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQIW 1 "s_register_operand" "w")
--	   (match_operand:VDQIW 2 "s_register_operand" "w")]
--          UNSPEC_VCGEU))]
--  "TARGET_NEON"
--  "vcge.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
--  [(set_attr "type" "neon_compare<q>")]
-+                      (const_string "neon_compare<q>")))]
- )
++
++  /* Special variant for floating-point.  */
++  union {
++    uint32_t var_int32;
++    float32_t var_float32;
++  } var_int32_float32;
++
++#define TEST_VGET_LANE_FP(Q, T1, T2, W, N, L)				   \
++  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
++  var_int##W##_float##W.var_float##W = VAR(var, T1, W);			   \
++  if (var_int##W##_float##W.var_int##W != expected##Q##_##T2##W) {	   \
++    fprintf(stderr,							   \
++	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
++	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
++	    TEST_MSG, __FILE__, __LINE__,				   \
++	    STR(expected##Q##_##T2##W),					   \
++	    STR(VECT_NAME(T1, W, N)),					   \
++	    var_int##W##_float##W.var_int##W,				   \
++	    expected##Q##_##T2##W);					   \
++    error_found = 1;							   \
++  }
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++
++  /* Scalar variables.  */
++  VAR_DECL(var, int, 8);
++  VAR_DECL(var, int, 16);
++  VAR_DECL(var, int, 32);
++  VAR_DECL(var, int, 64);
++  VAR_DECL(var, uint, 8);
++  VAR_DECL(var, uint, 16);
++  VAR_DECL(var, uint, 32);
++  VAR_DECL(var, uint, 64);
++  VAR_DECL(var, poly, 8);
++  VAR_DECL(var, poly, 16);
++  VAR_DECL(var, float, 32);
++
++  /* Initialize input values.  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++  VLOAD(vector, buffer, , float, f, 32, 2);
++  VLOAD(vector, buffer, q, float, f, 32, 4);
++
++  /* Choose lane arbitrarily.  */
++  TEST_VGET_LANE(, int, s, 8, 8, 7);
++  TEST_VGET_LANE(, int, s, 16, 4, 3);
++  TEST_VGET_LANE(, int, s, 32, 2, 1);
++  TEST_VGET_LANE(, int, s, 64, 1, 0);
++  TEST_VGET_LANE(, uint, u, 8, 8, 6);
++  TEST_VGET_LANE(, uint, u, 16, 4, 2);
++  TEST_VGET_LANE(, uint, u, 32, 2, 1);
++  TEST_VGET_LANE(, uint, u, 64, 1, 0);
++  TEST_VGET_LANE(, poly, p, 8, 8, 6);
++  TEST_VGET_LANE(, poly, p, 16, 4, 2);
++  TEST_VGET_LANE_FP(, float, f, 32, 2, 1);
++
++  TEST_VGET_LANE(q, int, s, 8, 16, 15);
++  TEST_VGET_LANE(q, int, s, 16, 8, 5);
++  TEST_VGET_LANE(q, int, s, 32, 4, 3);
++  TEST_VGET_LANE(q, int, s, 64, 2, 1);
++  TEST_VGET_LANE(q, uint, u, 8, 16, 14);
++  TEST_VGET_LANE(q, uint, u, 16, 8, 6);
++  TEST_VGET_LANE(q, uint, u, 32, 4, 2);
++  TEST_VGET_LANE(q, uint, u, 64, 2, 1);
++  TEST_VGET_LANE(q, poly, p, 8, 16, 14);
++  TEST_VGET_LANE(q, poly, p, 16, 8, 6);
++  TEST_VGET_LANE_FP(q, float, f, 32, 4, 3);
++}
++
++int main (void)
++{
++  exec_vget_lane ();
++
++  if (error_found)
++    abort();
++
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+@@ -17,34 +17,6 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					0xf4, 0xf5, 0xf6, 0xf7 };
+ VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #define TEST_MSG "VGET_LOW"
+ void exec_vget_low (void)
+@@ -76,7 +48,17 @@ void exec_vget_low (void)
+   TEST_VGET_LOW(poly, p, 16, 4, 8);
+   TEST_VGET_LOW(float, f, 32, 2, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+ }
  
--(define_insn "neon_vcgt<mode>"
-+(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
-   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
-         (unspec:<V_cmp_result>
--	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
--	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
--          UNSPEC_VCGT))]
-+	  [(match_operand:VCVTF 1 "s_register_operand" "w,w")
-+	   (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
-+          NEON_VCMP))]
-   "TARGET_NEON"
--  "@
--  vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
--  vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
--  [(set (attr "type")
--     (if_then_else (match_test "<Is_float_mode>")
--                   (const_string "neon_fp_compare_s<q>")
--                    (if_then_else (match_operand 2 "zero_operand")
--                      (const_string "neon_compare_zero<q>")
--                      (const_string "neon_compare<q>"))))]
-+  {
-+    char pattern[100];
-+    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
-+                       " %%<V_reg>1, %s",
-+                       which_alternative == 0
-+                         ? "%<V_reg>2" : "#0");
-+    output_asm_insn (pattern, operands);
-+    return "";
+ int main (void)
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x2x2_t
++f_vld2_lane_f32 (float32_t * p, float32x2x2_t v)
++{
++  float32x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_f32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x1x2_t
++f_vld2_lane_f64 (float64_t * p, float64x1x2_t v)
++{
++  float64x1x2_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++poly8x8x2_t
++f_vld2_lane_p8 (poly8_t * p, poly8x8x2_t v)
++{
++  poly8x8x2_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x4x2_t
++f_vld2_lane_s16 (int16_t * p, int16x4x2_t v)
++{
++  int16x4x2_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x2x2_t
++f_vld2_lane_s32 (int32_t * p, int32x2x2_t v)
++{
++  int32x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x1x2_t
++f_vld2_lane_s64 (int64_t * p, int64x1x2_t v)
++{
++  int64x1x2_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int8x8x2_t
++f_vld2_lane_s8 (int8_t * p, int8x8x2_t v)
++{
++  int8x8x2_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x4x2_t
++f_vld2_lane_u16 (uint16_t * p, uint16x4x2_t v)
++{
++  uint16x4x2_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x2x2_t
++f_vld2_lane_u32 (uint32_t * p, uint32x2x2_t v)
++{
++  uint32x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x1x2_t
++f_vld2_lane_u64 (uint64_t * p, uint64x1x2_t v)
++{
++  uint64x1x2_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint8x8x2_t
++f_vld2_lane_u8 (uint8_t * p, uint8x8x2_t v)
++{
++  uint8x8x2_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x4x2_t
++f_vld2q_lane_f32 (float32_t * p, float32x4x2_t v)
++{
++  float32x4x2_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_f32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x2x2_t
++f_vld2q_lane_f64 (float64_t * p, float64x2x2_t v)
++{
++  float64x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++poly8x16x2_t
++f_vld2q_lane_p8 (poly8_t * p, poly8x16x2_t v)
++{
++  poly8x16x2_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x8x2_t
++f_vld2q_lane_s16 (int16_t * p, int16x8x2_t v)
++{
++  int16x8x2_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x4x2_t
++f_vld2q_lane_s32 (int32_t * p, int32x4x2_t v)
++{
++  int32x4x2_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x2x2_t
++f_vld2q_lane_s64 (int64_t * p, int64x2x2_t v)
++{
++  int64x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int8x16x2_t
++f_vld2q_lane_s8 (int8_t * p, int8x16x2_t v)
++{
++  int8x16x2_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x8x2_t
++f_vld2q_lane_u16 (uint16_t * p, uint16x8x2_t v)
++{
++  uint16x8x2_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x4x2_t
++f_vld2q_lane_u32 (uint32_t * p, uint32x4x2_t v)
++{
++  uint32x4x2_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x2x2_t
++f_vld2q_lane_u64 (uint64_t * p, uint64x2x2_t v)
++{
++  uint64x2x2_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint8x16x2_t
++f_vld2q_lane_u8 (uint8_t * p, uint8x16x2_t v)
++{
++  uint8x16x2_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld2q_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x2x3_t
++f_vld3_lane_f32 (float32_t * p, float32x2x3_t v)
++{
++  float32x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_f32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x1x3_t
++f_vld3_lane_f64 (float64_t * p, float64x1x3_t v)
++{
++  float64x1x3_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++poly8x8x3_t
++f_vld3_lane_p8 (poly8_t * p, poly8x8x3_t v)
++{
++  poly8x8x3_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x4x3_t
++f_vld3_lane_s16 (int16_t * p, int16x4x3_t v)
++{
++  int16x4x3_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x2x3_t
++f_vld3_lane_s32 (int32_t * p, int32x2x3_t v)
++{
++  int32x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x1x3_t
++f_vld3_lane_s64 (int64_t * p, int64x1x3_t v)
++{
++  int64x1x3_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int8x8x3_t
++f_vld3_lane_s8 (int8_t * p, int8x8x3_t v)
++{
++  int8x8x3_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x4x3_t
++f_vld3_lane_u16 (uint16_t * p, uint16x4x3_t v)
++{
++  uint16x4x3_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x2x3_t
++f_vld3_lane_u32 (uint32_t * p, uint32x2x3_t v)
++{
++  uint32x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x1x3_t
++f_vld3_lane_u64 (uint64_t * p, uint64x1x3_t v)
++{
++  uint64x1x3_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint8x8x3_t
++f_vld3_lane_u8 (uint8_t * p, uint8x8x3_t v)
++{
++  uint8x8x3_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x4x3_t
++f_vld3q_lane_f32 (float32_t * p, float32x4x3_t v)
++{
++  float32x4x3_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_f32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x2x3_t
++f_vld3q_lane_f64 (float64_t * p, float64x2x3_t v)
++{
++  float64x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++poly8x16x3_t
++f_vld3q_lane_p8 (poly8_t * p, poly8x16x3_t v)
++{
++  poly8x16x3_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x8x3_t
++f_vld3q_lane_s16 (int16_t * p, int16x8x3_t v)
++{
++  int16x8x3_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x4x3_t
++f_vld3q_lane_s32 (int32_t * p, int32x4x3_t v)
++{
++  int32x4x3_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x2x3_t
++f_vld3q_lane_s64 (int64_t * p, int64x2x3_t v)
++{
++  int64x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int8x16x3_t
++f_vld3q_lane_s8 (int8_t * p, int8x16x3_t v)
++{
++  int8x16x3_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x8x3_t
++f_vld3q_lane_u16 (uint16_t * p, uint16x8x3_t v)
++{
++  uint16x8x3_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x4x3_t
++f_vld3q_lane_u32 (uint32_t * p, uint32x4x3_t v)
++{
++  uint32x4x3_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x2x3_t
++f_vld3q_lane_u64 (uint64_t * p, uint64x2x3_t v)
++{
++  uint64x2x3_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint8x16x3_t
++f_vld3q_lane_u8 (uint8_t * p, uint8x16x3_t v)
++{
++  uint8x16x3_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld3q_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x2x4_t
++f_vld4_lane_f32 (float32_t * p, float32x2x4_t v)
++{
++  float32x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_f32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x1x4_t
++f_vld4_lane_f64 (float64_t * p, float64x1x4_t v)
++{
++  float64x1x4_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++poly8x8x4_t
++f_vld4_lane_p8 (poly8_t * p, poly8x8x4_t v)
++{
++  poly8x8x4_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x4x4_t
++f_vld4_lane_s16 (int16_t * p, int16x4x4_t v)
++{
++  int16x4x4_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x2x4_t
++f_vld4_lane_s32 (int32_t * p, int32x2x4_t v)
++{
++  int32x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x1x4_t
++f_vld4_lane_s64 (int64_t * p, int64x1x4_t v)
++{
++  int64x1x4_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int8x8x4_t
++f_vld4_lane_s8 (int8_t * p, int8x8x4_t v)
++{
++  int8x8x4_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x4x4_t
++f_vld4_lane_u16 (uint16_t * p, uint16x4x4_t v)
++{
++  uint16x4x4_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x2x4_t
++f_vld4_lane_u32 (uint32_t * p, uint32x2x4_t v)
++{
++  uint32x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x1x4_t
++f_vld4_lane_u64 (uint64_t * p, uint64x1x4_t v)
++{
++  uint64x1x4_t res;
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint8x8x4_t
++f_vld4_lane_u8 (uint8_t * p, uint8x8x4_t v)
++{
++  uint8x8x4_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++float32x4x4_t
++f_vld4q_lane_f32 (float32_t * p, float32x4x4_t v)
++{
++  float32x4x4_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_f32 (p, v, -1);
++  return res;
 +}
-+  [(set_attr "type" "neon_fp_compare_s<q>")]
- )
- 
--(define_insn "neon_vcgtu<mode>"
-+(define_insn "neon_vc<cmp_op>u<mode>"
-   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQIW 1 "s_register_operand" "w")
--	   (match_operand:VDQIW 2 "s_register_operand" "w")]
--          UNSPEC_VCGTU))]
-+        (neg:<V_cmp_result>
-+          (GTUGEU:<V_cmp_result>
-+	    (match_operand:VDQIW 1 "s_register_operand" "w")
-+	    (match_operand:VDQIW 2 "s_register_operand" "w"))))]
-   "TARGET_NEON"
--  "vcgt.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-+  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-   [(set_attr "type" "neon_compare<q>")]
- )
- 
--;; VCLE and VCLT only support comparisons with immediate zero (register
--;; variants are VCGE and VCGT with operands reversed).
--
--(define_insn "neon_vcle<mode>"
--  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQW 1 "s_register_operand" "w")
--	   (match_operand:VDQW 2 "zero_operand" "Dz")]
--          UNSPEC_VCLE))]
--  "TARGET_NEON"
--  "vcle.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
--  [(set (attr "type")
--      (if_then_else (match_test "<Is_float_mode>")
--                    (const_string "neon_fp_compare_s<q>")
--                    (if_then_else (match_operand 2 "zero_operand")
--                      (const_string "neon_compare_zero<q>")
--                      (const_string "neon_compare<q>"))))]
--)
--
--(define_insn "neon_vclt<mode>"
--  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
--        (unspec:<V_cmp_result>
--	  [(match_operand:VDQW 1 "s_register_operand" "w")
--	   (match_operand:VDQW 2 "zero_operand" "Dz")]
--          UNSPEC_VCLT))]
-+(define_expand "neon_vca<cmp_op><mode>"
-+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
-+        (neg:<V_cmp_result>
-+          (GTGE:<V_cmp_result>
-+            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
-+            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
-   "TARGET_NEON"
--  "vclt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
--  [(set (attr "type")
--      (if_then_else (match_test "<Is_float_mode>")
--                    (const_string "neon_fp_compare_s<q>")
--                    (if_then_else (match_operand 2 "zero_operand")
--                      (const_string "neon_compare_zero<q>")
--                      (const_string "neon_compare<q>"))))]
-+  {
-+    if (flag_unsafe_math_optimizations)
-+      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
-+                                                  operands[2]));
-+    else
-+      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
-+                                                         operands[1],
-+                                                         operands[2]));
-+    DONE;
-+  }
- )
- 
--(define_insn "neon_vcage<mode>"
-+(define_insn "neon_vca<cmp_op><mode>_insn"
-   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
--        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
--		                (match_operand:VCVTF 2 "s_register_operand" "w")]
--                               UNSPEC_VCAGE))]
--  "TARGET_NEON"
--  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-+        (neg:<V_cmp_result>
-+          (GTGE:<V_cmp_result>
-+            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
-+            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
-+  "TARGET_NEON && flag_unsafe_math_optimizations"
-+  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-   [(set_attr "type" "neon_fp_compare_s<q>")]
- )
- 
--(define_insn "neon_vcagt<mode>"
-+(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
-   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
-         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
- 		                (match_operand:VCVTF 2 "s_register_operand" "w")]
--                               UNSPEC_VCAGT))]
-+                               NEON_VACMP))]
-   "TARGET_NEON"
--  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-+  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-   [(set_attr "type" "neon_fp_compare_s<q>")]
- )
- 
---- a/src/gcc/config/arm/thumb2.md
-+++ b/src/gcc/config/arm/thumb2.md
-@@ -300,7 +300,7 @@
-    ldr%?\\t%0, %1
-    str%?\\t%1, %0
-    str%?\\t%1, %0"
--  [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")
-+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1")
-    (set_attr "length" "2,4,2,4,4,4,4,4,4")
-    (set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
-@@ -486,12 +486,12 @@
- )
- 
- (define_insn_and_split "*thumb2_movsicc_insn"
--  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r")
-+  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
- 	(if_then_else:SI
- 	 (match_operator 3 "arm_comparison_operator"
- 	  [(match_operand 4 "cc_register" "") (const_int 0)])
--	 (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r")
--	 (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))]
-+	 (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r")
-+	 (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))]
-   "TARGET_THUMB2"
-   "@
-    it\\t%D3\;mov%D3\\t%0, %2
-@@ -504,12 +504,14 @@
-    #
-    #
-    #
-+   #
-    #"
-    ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
--   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
--   ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
--   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
--   ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
-+   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
-+   ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
-+   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
-+   ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
-+   ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
-   "&& reload_completed"
-   [(const_int 0)]
-   {
-@@ -540,10 +542,30 @@
-                                                operands[2])));
-     DONE;
-   }
--  [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")
--   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")
-+  [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6")
-+   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes")
-    (set_attr "conds" "use")
--   (set_attr "type" "multiple")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (if_then_else (match_operand 1 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (const_string "mvn_imm")
-+                          (if_then_else (match_operand 1 "const_int_operand" "")
-+                                        (const_string "mov_imm")
-+                                        (const_string "mov_reg"))
-+                          (const_string "mvn_imm")
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")
-+                          (const_string "multiple")])]
- )
- 
- (define_insn "*thumb2_movsfcc_soft_insn"
-@@ -1182,7 +1204,11 @@
-   "
-   [(set_attr "predicable" "yes")
-    (set_attr "length" "2")
--   (set_attr "type" "alu_sreg")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "alu_imm")
-+                                        (const_string "alu_sreg"))
-+                          (const_string "alu_imm")])]
- )
- 
- (define_insn "*thumb2_subsi_short"
-@@ -1247,14 +1273,21 @@
-   "
-   [(set_attr "conds" "set")
-    (set_attr "length" "2,2,4")
--   (set_attr "type" "alu_sreg")]
-+   (set_attr_alternative "type"
-+                         [(if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "alus_imm")
-+                                        (const_string "alus_sreg"))
-+                          (const_string "alus_imm")
-+                          (if_then_else (match_operand 2 "const_int_operand" "")
-+                                        (const_string "alus_imm")
-+                                        (const_string "alus_sreg"))])]
- )
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++float64x2x4_t
++f_vld4q_lane_f64 (float64_t * p, float64x2x4_t v)
++{
++  float64x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_f64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++poly8x16x4_t
++f_vld4q_lane_p8 (poly8_t * p, poly8x16x4_t v)
++{
++  poly8x16x4_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_p8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int16x8x4_t
++f_vld4q_lane_s16 (int16_t * p, int16x8x4_t v)
++{
++  int16x8x4_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++int32x4x4_t
++f_vld4q_lane_s32 (int32_t * p, int32x4x4_t v)
++{
++  int32x4x4_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int64x2x4_t
++f_vld4q_lane_s64 (int64_t * p, int64x2x4_t v)
++{
++  int64x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++int8x16x4_t
++f_vld4q_lane_s8 (int8_t * p, int8x16x4_t v)
++{
++  int8x16x4_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_s8 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint16x8x4_t
++f_vld4q_lane_u16 (uint16_t * p, uint16x8x4_t v)
++{
++  uint16x8x4_t res;
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u16 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++uint32x4x4_t
++f_vld4q_lane_u32 (uint32_t * p, uint32x4x4_t v)
++{
++  uint32x4x4_t res;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u32 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint64x2x4_t
++f_vld4q_lane_u64 (uint64_t * p, uint64x2x4_t v)
++{
++  uint64x2x4_t res;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u64 (p, v, -1);
++  return res;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c
+@@ -0,0 +1,17 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++uint8x16x4_t
++f_vld4q_lane_u8 (uint8_t * p, uint8x16x4_t v)
++{
++  uint8x16x4_t res;
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  res = vld4q_lane_u8 (p, v, -1);
++  return res;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+@@ -27,8 +27,6 @@ VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					       0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -37,8 +35,6 @@ VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -71,8 +67,6 @@ VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 					       0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -81,8 +75,6 @@ VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 						0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 						0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -115,8 +107,6 @@ VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					       0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -125,8 +115,6 @@ VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -159,8 +147,6 @@ VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 					       0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -169,8 +155,6 @@ VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 						0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 						0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -206,8 +190,6 @@ VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+ 					       0x4, 0x5, 0x6, 0x7 };
+ VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+ 					       0xfffffffa, 0xfffffffb };
+-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+ 						0x14, 0x15, 0x16, 0x17,
+ 						0x18, 0x19, 0x1a, 0x1b,
+@@ -216,8 +198,6 @@ VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7 };
+ VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+ 						0xfffffffa, 0xfffffffb };
+-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+ 						0x14, 0x15, 0x16, 0x17,
+ 						0x18, 0x19, 0x1a, 0x1b,
+@@ -252,8 +232,6 @@ VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					       0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -262,8 +240,6 @@ VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						0xf4, 0xf5, 0xf6, 0xf7,
+ 						0xf8, 0xf9, 0xfa, 0xfb,
+@@ -296,8 +272,6 @@ VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 					       0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -306,8 +280,6 @@ VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+ 						0xfffc, 0xfffd, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+ 						0xfffffff6, 0xfffffff7 };
+-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7,
+ 						0x8, 0x9, 0xa, 0xb,
+@@ -340,8 +312,6 @@ VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+ 					       0x4, 0x5, 0x6, 0x7 };
+ VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+ 					       0xfffffffa, 0xfffffffb };
+-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+ 						0x14, 0x15, 0x16, 0x17,
+ 						0x18, 0x19, 0x1a, 0x1b,
+@@ -350,8 +320,6 @@ VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+ 						0x4, 0x5, 0x6, 0x7 };
+ VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+ 						0xfffffffa, 0xfffffffb };
+-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+ 						0x14, 0x15, 0x16, 0x17,
+ 						0x18, 0x19, 0x1a, 0x1b,
+@@ -384,8 +352,6 @@ VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+ 					       0xc, 0xd, 0xe, 0xf };
+ VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
+ 					       0xfffffffe, 0xffffffff };
+-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+ 						0x24, 0x25, 0x26, 0x27,
+ 						0x28, 0x29, 0x2a, 0x2b,
+@@ -394,8 +360,6 @@ VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+ 						0xc, 0xd, 0xe, 0xf };
+ VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd,
+ 						0xfffffffe, 0xffffffff };
+-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+ 						0x24, 0x25, 0x26, 0x27,
+ 						0x28, 0x29, 0x2a, 0x2b,
+@@ -500,6 +464,32 @@ void exec_vldX (void)
+   TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
+   TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+ 
++  /* vldX supports all vector types except [u]int64x2.  */
++#define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment)			\
++  {									\
++    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
++									\
++    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
++  }									\
++
+   DECL_ALL_VLDX(2);
+   DECL_ALL_VLDX(3);
+   DECL_ALL_VLDX(4);
+@@ -650,39 +640,39 @@ void exec_vldX (void)
+   clean_results ();
+ #define TEST_MSG "VLD2/VLD2Q"
+   TEST_ALL_VLDX(2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(2, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_1, "chunk 1");
+ 
+   /* Check vld3/vld3q.  */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD3/VLD3Q"
+   TEST_ALL_VLDX(3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_1, "chunk 1");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_2, "chunk 2");
+ 
+   /* Check vld4/vld4q.  */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD4/VLD4Q"
+   TEST_ALL_VLDX(4);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_1, "chunk 1");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_2, "chunk 2");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3");
++  CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_3, "chunk 3");
+ }
  
- (define_insn "*thumb2_addsi3_compare0_scratch"
-   [(set (reg:CC_NOOV CC_REGNUM)
- 	(compare:CC_NOOV
--	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,  r,r")
--		   (match_operand:SI 1 "arm_add_operand"    "Pv,l,IL,r"))
-+	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,  r")
-+		   (match_operand:SI 1 "arm_add_operand"    "lPv,rIL"))
- 	  (const_int 0)))]
-   "TARGET_THUMB2"
-   "*
-@@ -1271,8 +1304,10 @@
-       return \"cmn\\t%0, %1\";
-   "
-   [(set_attr "conds" "set")
--   (set_attr "length" "2,2,4,4")
--   (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")]
-+   (set_attr "length" "2,4")
-+   (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "")
-+                                    (const_string "alus_imm")
-+                                    (const_string "alus_sreg")))]
- )
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+@@ -19,34 +19,6 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+ 					0xf0, 0xf1, 0xf0, 0xf1 };
+ VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0x33333333, 0x33333333,
+-					0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* vld2_dup/chunk 1.  */
+ VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+@@ -64,34 +36,6 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+ VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
+ 						0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld3_dup/chunk 0.  */
+ VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
+@@ -111,34 +55,6 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
+ VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
+ 						0xfff2, 0xfff0 };
+ VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld3_dup/chunk 1.  */
+ VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
+@@ -158,34 +74,6 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
+ VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
+ 						0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
+-VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld3_dup/chunk 2.  */
+ VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
+@@ -205,34 +93,6 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
+ VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
+ 						0xfff1, 0xfff2 };
+ VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
+-VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld4_dup/chunk 0.  */
+ VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+@@ -250,34 +110,6 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xf0, 0xf1, 0xf2, 0xf3 };
+ VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld4_dup/chunk 1.  */
+ VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+@@ -294,34 +126,6 @@ VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xf0, 0xf1, 0xf2, 0xf3 };
+ VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+-VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld4_dup/chunk 2.  */
+ VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+@@ -338,34 +142,6 @@ VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xf0, 0xf1, 0xf2, 0xf3 };
+ VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ /* vld4_dup/chunk3.  */
+ VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+@@ -382,33 +158,6 @@ VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xf0, 0xf1, 0xf2, 0xf3 };
+ VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+-VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					       0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0x33333333, 0x33333333,
+-					       0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0x33333333, 0x33333333,
+-						0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-						0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						  0x33333333, 0x33333333 };
+ 
+ void exec_vldX_dup (void)
+ {
+@@ -478,6 +227,21 @@ void exec_vldX_dup (void)
+   TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+   TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
+ 
++  /* vldX_dup supports only 64-bit inputs.  */
++#define CHECK_RESULTS_VLDX_DUP(test_name,EXPECTED,comment)		\
++  {									\
++    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
++  }									\
+ 
+   DECL_ALL_VLDX_DUP(2);
+   DECL_ALL_VLDX_DUP(3);
+@@ -629,39 +393,39 @@ void exec_vldX_dup (void)
+   clean_results ();
+ #define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+   TEST_ALL_VLDX_DUP(2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld2_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(2, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld2_1, "chunk 1");
+ 
+   /* Check vld3_dup/vld3q_dup.  */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+   TEST_ALL_VLDX_DUP(3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_1, "chunk 1");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld3_2, "chunk 2");
+ 
+   /* Check vld4_dup/vld4q_dup */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+   TEST_ALL_VLDX_DUP(4);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_0, "chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_1, "chunk 1");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_2, "chunk 2");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3");
++  CHECK_RESULTS_VLDX_DUP (TEST_MSG, expected_vld4_3, "chunk 3");
+ }
  
- (define_insn "*thumb2_mulsi_short"
---- a/src/gcc/config/arm/unknown-elf.h
-+++ b/src/gcc/config/arm/unknown-elf.h
-@@ -32,7 +32,9 @@
- #define UNKNOWN_ELF_STARTFILE_SPEC	" crti%O%s crtbegin%O%s crt0%O%s"
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+@@ -9,42 +9,24 @@ VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -55,40 +37,22 @@ VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xf0, 0xf1 };
+ VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+@@ -99,40 +63,22 @@ VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xfffffff2, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -143,40 +89,22 @@ VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+ VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xf0, 0xf1, 0xf2, 0xaa };
+ VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
+-VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xf0, 0xf1, 0xf2, 0xaa };
+ VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+ VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+ VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -187,40 +115,22 @@ VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xf0, 0xf1, 0xf2 };
+ VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
+-VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+ VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+ 					       0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
+@@ -231,40 +141,22 @@ VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+-VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 						0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -275,40 +167,22 @@ VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+-VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -319,40 +193,22 @@ VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					       0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+@@ -363,40 +219,22 @@ VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					      0xf0, 0xf1, 0xf2, 0xf3 };
+ VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+ 					       0xaa, 0xaa, 0xaa, 0xaa };
+ VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33,
+-					       0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 					       0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+-					       0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+ 						0xaaaaaaaa, 0xaaaaaaaa };
+-VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+-						0x3333333333333333 };
+-VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33,
+-						0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+ VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+@@ -542,6 +380,26 @@ void exec_vldX_lane (void)
+   TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+   TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+ 
++  /* vldX_lane supports only a subset of all variants.  */
++#define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment)		\
++  {									\
++    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
++    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
++  }									\
++
+   /* Declare the temporary buffers / variables.  */
+   DECL_ALL_VLDX_LANE(2);
+   DECL_ALL_VLDX_LANE(3);
+@@ -568,39 +426,39 @@ void exec_vldX_lane (void)
+   clean_results ();
+ #define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+   TEST_ALL_VLDX_LANE(2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, " chunk 0");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_0, " chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(2, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, " chunk 1");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_1, " chunk 1");
+ 
+   /* Check vld3_lane/vld3q_lane.  */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+   TEST_ALL_VLDX_LANE(3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, " chunk 0");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_0, " chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, " chunk 1");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_1, " chunk 1");
+ 
+   TEST_ALL_EXTRA_CHUNKS(3, 2);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, " chunk 2");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_2, " chunk 2");
+ 
+   /* Check vld4_lane/vld4q_lane.  */
+   clean_results ();
+ #undef TEST_MSG
+ #define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+   TEST_ALL_VLDX_LANE(4);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, " chunk 0");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_0, " chunk 0");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 1);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, " chunk 1");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_1, " chunk 1");
+   TEST_ALL_EXTRA_CHUNKS(4, 2);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, " chunk 2");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_2, " chunk 2");
+ 
+   TEST_ALL_EXTRA_CHUNKS(4, 3);
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, " chunk 3");
++  CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_3, " chunk 3");
+ }
  
- #undef  STARTFILE_SPEC
--#define STARTFILE_SPEC	UNKNOWN_ELF_STARTFILE_SPEC
-+#define STARTFILE_SPEC	\
-+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
-+  UNKNOWN_ELF_STARTFILE_SPEC
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c
+@@ -7,15 +7,12 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23,
+ 				       0x34, 0x45, 0x56, 0x67 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c,
+ 					0xd0, 0x14, 0x58, 0x9c };
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc,
+ 					0xd0, 0x94, 0x58, 0x1c };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5,
+ 					0x6c, 0xe3, 0x5a, 0xd1,
+@@ -25,8 +22,6 @@ VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918,
+ 					0xf9a0, 0xfa28, 0xfab0, 0xfb38 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709,
+ 					0xfffff7a2, 0xfffff83b };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+ VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e,
+ 					 0x8, 0xb2, 0x5c, 0x6,
+ 					 0xb0, 0x5a, 0x4, 0xae,
+@@ -35,14 +30,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681,
+ 					 0xf73c, 0xf7f7, 0xf8b2, 0xf96d };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c,
+ 					 0xfffff4d8, 0xfffff5a4 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e,
+ 					 0xc8, 0x62, 0x9c, 0x36,
+ 					 0x30, 0x9a, 0x64, 0xce,
+ 					 0x98, 0x32, 0xcc, 0x66 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+ VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000,
+ 					   0xc4ae4ccd, 0xc4a1d999 };
+ 
+@@ -145,7 +136,22 @@ void FNNAME (INSN_NAME) (void)
+   TEST_VMUL(INSN_NAME, q, poly, p, 8, 16);
+   TEST_VMUL(INSN_NAME, q, float, f, 32, 4);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+ }
  
- #define UNKNOWN_ELF_ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c
+@@ -12,41 +12,11 @@ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 				       0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+ 					0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+-					0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results for float32 variants. Needs to be separated since
+    the generic test function does not test floating-point
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c
+@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb,
+ 					0xfd, 0xfd, 0xff, 0xff,
+ 					0xf9, 0xf9, 0xfb, 0xfb,
+@@ -38,11 +34,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+ 					 0xfffffffa, 0xfffffffb };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc,
+ 					 0xfffffffffffffffd };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c
+@@ -14,10 +14,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7,
+ 					0xf6, 0xf7, 0xf6, 0xf7,
+ 					0xfe, 0xff, 0xfe, 0xff,
+@@ -38,11 +34,3 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7,
+ 					 0xfffffff7, 0xfffffff7 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+ 					 0xfffffffffffffff3 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c
+@@ -11,16 +11,6 @@ void vqabs_extra(void);
+ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9,
+ 					0x8, 0x7, 0x6, 0x5,
+@@ -28,25 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected values of cumulative_saturation flag.  */
+ int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c
+@@ -39,10 +39,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4,
+ 					0x5, 0x6, 0x7, 0x8,
+ 					0x9, 0xa, 0xb, 0xc,
+@@ -61,14 +57,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+ 					 0xffffffff, 0xffffffff };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
+ 					 0xffffffffffffffff };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
  
-@@ -80,7 +82,9 @@
- 									\
-       ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
-       ASM_OUTPUT_LABEL (FILE, NAME);					\
--      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1);		\
-+      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1);		\
-+      fprintf (FILE, "\t.size\t%s, %d\n",				\
-+	       NAME, SIZE ? (int) SIZE : 1);				\
-     }									\
-   while (0)
  
---- a/src/gcc/config/glibc-stdint.h
-+++ b/src/gcc/config/glibc-stdint.h
-@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- <http://www.gnu.org/licenses/>.  */
+ /* 64-bits types, with 0 as second input.  */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c
+@@ -0,0 +1,134 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12,
++				       0x12, 0x12, 0x12, 0x12 };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82,
++					0x82, 0x82, 0x82, 0x82 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 };
++
++/* Expected values of cumulative_saturation flag when saturation occurs.  */
++int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1;
++
++/* Expected results when saturation occurs.  */
++VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++
++#define INSN_NAME vqmovn
++#define TEST_MSG "VQMOVN"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN_NAME)
++{
++  /* Basic test: y=OP(x), then store the result.  */
++#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N));			\
++  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		 VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* No need for 64 bits variants.  */
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++  DECL_VARIABLE(vector, uint, 16, 8);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, uint, 64, 2);
++
++  DECL_VARIABLE(vector_res, int, 8, 8);
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++
++  clean_results ();
++
++  /* Fill input vector with arbitrary values.  */
++  VDUP(vector, q, int, s, 16, 8, 0x12);
++  VDUP(vector, q, int, s, 32, 4, 0x1278);
++  VDUP(vector, q, int, s, 64, 2, 0x12345678);
++  VDUP(vector, q, uint, u, 16, 8, 0x82);
++  VDUP(vector, q, uint, u, 32, 4, 0x8765);
++  VDUP(vector, q, uint, u, 64, 2, 0x87654321);
++
++  /* Apply a unary operator named INSN_NAME.  */
++#define CMT ""
++  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++
++
++  /* Fill input vector with arbitrary values which cause cumulative
++     saturation.  */
++  VDUP(vector, q, int, s, 16, 8, 0x1234);
++  VDUP(vector, q, int, s, 32, 4, 0x12345678);
++  VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL);
++  VDUP(vector, q, uint, u, 16, 8, 0x8234);
++  VDUP(vector, q, uint, u, 32, 4, 0x87654321);
++  VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL);
++
++  /* Apply a unary operator named INSN_NAME.  */
++#undef CMT
++#define CMT " (with saturation)"
++  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT);
++  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT);
++  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT);
++}
++
++int main (void)
++{
++  exec_vqmovn ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c
+@@ -0,0 +1,93 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34,
++					0x34, 0x34, 0x34, 0x34 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 };
++
++/* Expected values of cumulative_saturation flag with negative input.  */
++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1;
++
++/* Expected results with negative input.  */
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++
++#define INSN_NAME vqmovun
++#define TEST_MSG "VQMOVUN"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN_NAME)
++{
++  /* Basic test: y=OP(x), then store the result.  */
++#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##_s##W2(VECT_VAR(vector, int, W2, N));				\
++  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		 VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++
++  clean_results ();
++
++  /* Fill input vector with arbitrary values.  */
++  VDUP(vector, q, int, s, 16, 8, 0x34);
++  VDUP(vector, q, int, s, 32, 4, 0x5678);
++  VDUP(vector, q, int, s, 64, 2, 0x12345678);
++
++  /* Apply a unary operator named INSN_NAME.  */
++#define CMT ""
++  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++
++  /* Fill input vector with negative values.  */
++  VDUP(vector, q, int, s, 16, 8, 0x8234);
++  VDUP(vector, q, int, s, 32, 4, 0x87654321);
++  VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL);
++
++  /* Apply a unary operator named INSN_NAME.  */
++#undef CMT
++#define CMT " (negative input)"
++  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT);
++  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++}
++
++int main (void)
++{
++  exec_vqmovun ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c
+@@ -11,16 +11,6 @@ void vqneg_extra(void);
+ VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+ VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9,
+ 					0x8, 0x7, 0x6, 0x5,
+@@ -28,25 +18,6 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+ 					0xc, 0xb, 0xa, 0x9 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+-VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+-					 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
  
-+/* Systems using musl libc should use this header and make sure
-+   OPTION_MUSL is defined correctly before using the TYPE macros. */
-+#ifndef OPTION_MUSL
-+#define OPTION_MUSL 0
-+#endif
+ /* Expected values of cumulative_saturation flag.  */
+ int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c
+@@ -0,0 +1,161 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag when multiplication
++   saturates.  */
++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
++
++/* Expected results when multiplication saturates.  */
++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					    0x7fffffff, 0x7fffffff };
++
++/* Expected values of cumulative_saturation flag when rounding
++   should not cause saturation.  */
++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
++
++/* Expected results when rounding should not cause saturation.  */
++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					      0x7fffffff, 0x7fffffff };
++
++#define INSN vqrdmulh
++#define TEST_MSG "VQRDMULH"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* vector_res = vqrdmulh(vector,vector2), then store the result.  */
++#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		      VECT_VAR(vector2, T1, W, N));			\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
++  TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  DECL_VARIABLE(vector, int, 16, 4);
++  DECL_VARIABLE(vector, int, 32, 2);
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, int, 16, 8);
++  DECL_VARIABLE(vector_res, int, 32, 4);
++
++  DECL_VARIABLE(vector2, int, 16, 4);
++  DECL_VARIABLE(vector2, int, 32, 2);
++  DECL_VARIABLE(vector2, int, 16, 8);
++  DECL_VARIABLE(vector2, int, 32, 4);
++
++  clean_results ();
++
++  VLOAD(vector, buffer, , int, s, 16, 4);
++  VLOAD(vector, buffer, , int, s, 32, 2);
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++
++  /* Initialize vector2.  */
++  VDUP(vector2, , int, s, 16, 4, 0x5555);
++  VDUP(vector2, , int, s, 32, 2, 0xBB);
++  VDUP(vector2, q, int, s, 16, 8, 0x33);
++  VDUP(vector2, q, int, s, 32, 4, 0x22);
++
++#define CMT ""
++  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++
++  /* Now use input values such that the multiplication causes
++     saturation.  */
++#define TEST_MSG_MUL " (check mul cumulative saturation)"
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector2, , int, s, 16, 4, 0x8000);
++  VDUP(vector2, , int, s, 32, 2, 0x80000000);
++  VDUP(vector2, q, int, s, 16, 8, 0x8000);
++  VDUP(vector2, q, int, s, 32, 4, 0x80000000);
++
++  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
++  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
++  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
++  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
++
++  /* Use input values where rounding produces a result equal to the
++     saturation value, but does not set the saturation flag.  */
++#define TEST_MSG_ROUND " (check rounding)"
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector2, , int, s, 16, 4, 0x8001);
++  VDUP(vector2, , int, s, 32, 2, 0x80000001);
++  VDUP(vector2, q, int, s, 16, 8, 0x8001);
++  VDUP(vector2, q, int, s, 32, 4, 0x80000001);
++
++  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
++  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND);
++  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND);
++  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++}
++
++int main (void)
++{
++  exec_vqrdmulh ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c
+@@ -0,0 +1,169 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag when multiplication
++   saturates.  */
++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
++
++/* Expected results when multiplication saturates.  */
++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					    0x7fffffff, 0x7fffffff };
++
++/* Expected values of cumulative_saturation flag when rounding
++   should not cause saturation.  */
++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
++
++/* Expected results when rounding should not cause saturation.  */
++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					      0x7fffffff, 0x7fffffff };
++
++#define INSN vqrdmulh
++#define TEST_MSG "VQRDMULH_LANE"
++
++#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result.  */
++#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			   VECT_VAR(vector2, T1, W, N2),		\
++			   L);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  DECL_VARIABLE(vector, int, 16, 4);
++  DECL_VARIABLE(vector, int, 32, 2);
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, int, 16, 8);
++  DECL_VARIABLE(vector_res, int, 32, 4);
++
++  /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with
++     the same number of elements, so we need only one variable of each
++     type.  */
++  DECL_VARIABLE(vector2, int, 16, 4);
++  DECL_VARIABLE(vector2, int, 32, 2);
++
++  clean_results ();
++
++  VLOAD(vector, buffer, , int, s, 16, 4);
++  VLOAD(vector, buffer, , int, s, 32, 2);
++
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++
++  /* Initialize vector2.  */
++  VDUP(vector2, , int, s, 16, 4, 0x55);
++  VDUP(vector2, , int, s, 32, 2, 0xBB);
++
++  /* Choose lane arbitrarily.  */
++#define CMT ""
++  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++
++  /* Now use input values such that the multiplication causes
++     saturation.  */
++#define TEST_MSG_MUL " (check mul cumulative saturation)"
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector2, , int, s, 16, 4, 0x8000);
++  VDUP(vector2, , int, s, 32, 2, 0x80000000);
++
++  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
++		     TEST_MSG_MUL);
++  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
++		     TEST_MSG_MUL);
++  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
++		     TEST_MSG_MUL);
++  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
++		     TEST_MSG_MUL);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
++
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector2, , int, s, 16, 4, 0x8001);
++  VDUP(vector2, , int, s, 32, 2, 0x80000001);
++
++  /* Use input values where rounding produces a result equal to the
++     saturation value, but does not set the saturation flag.  */
++#define TEST_MSG_ROUND " (check rounding)"
++  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
++		     TEST_MSG_ROUND);
++  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
++		     TEST_MSG_ROUND);
++  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
++		     TEST_MSG_ROUND);
++  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
++		     TEST_MSG_ROUND);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++}
++
++int main (void)
++{
++  exec_vqrdmulh_lane ();
++  return 0;
++}
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c
+@@ -0,0 +1,155 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5,
++					0x5, 0x4, 0x4, 0x4 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe,
++					0xfffffffe, 0xfffffffe };
++
++/* Expected values of cumulative_saturation flag when multiplication
++   saturates.  */
++int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
++
++/* Expected results when multiplication saturates.  */
++VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					    0x7fffffff, 0x7fffffff };
++
++/* Expected values of cumulative_saturation flag when rounding
++   should not cause saturation.  */
++int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
++
++/* Expected results when rounding should not cause saturation.  */
++VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					      0x7fffffff, 0x7fffffff };
++
++#define INSN vqrdmulh
++#define TEST_MSG "VQRDMULH_N"
++
++#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  int i;
++
++  /* vector_res = vqrdmulh_n(vector,val), then store the result.  */
++#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			L);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  DECL_VARIABLE(vector, int, 16, 4);
++  DECL_VARIABLE(vector, int, 32, 2);
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, int, 16, 8);
++  DECL_VARIABLE(vector_res, int, 32, 4);
++
++  clean_results ();
++
++  VLOAD(vector, buffer, , int, s, 16, 4);
++  VLOAD(vector, buffer, , int, s, 32, 2);
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++
++  /* Choose multiplier arbitrarily.  */
++#define CMT ""
++  TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT);
++  TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++
++  /* Now use input values such that the multiplication causes
++     saturation.  */
++#define TEST_MSG_MUL " (check mul cumulative saturation)"
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++
++  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul,
++		  TEST_MSG_MUL);
++  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul,
++		  TEST_MSG_MUL);
++  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul,
++		  TEST_MSG_MUL);
++  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul,
++		  TEST_MSG_MUL);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
++
++  /* Use input values where rounding produces a result equal to the
++     saturation value, but does not set the saturation flag.  */
++#define TEST_MSG_ROUND " (check rounding)"
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++
++  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round,
++		  TEST_MSG_ROUND);
++  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round,
++		  TEST_MSG_ROUND);
++  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round,
++		  TEST_MSG_ROUND);
++  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round,
++		  TEST_MSG_ROUND);
++
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++}
++
++int main (void)
++{
++  exec_vqrdmulh_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c
+@@ -0,0 +1,1090 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag with input=0.  */
++int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
++
++/* Expected results with input=0.  */
++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with input=0 and
++   negative shift amount.  */
++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
++
++/* Expected results with input=0 and negative shift amount.  */
++VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
++				       0xe8, 0xea, 0xec, 0xee };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
++					0x8000, 0x8000, 0x8000, 0x8000 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
++					0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++					 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
++					 0xffffffffffffffff };
++
++/* Expected values of cumulative_saturation flag with negative shift
++   amount.  */
++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
++
++/* Expected results with negative shift amount.  */
++VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd,
++					   0xfd, 0xfd, 0xfe, 0xfe };
++VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd };
++VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d,
++					    0x3d, 0x3d, 0x3e, 0x3e };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
++VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2,
++					     0x2, 0x2, 0x2, 0x2,
++					     0x2, 0x2, 0x2, 0x2,
++					     0x2, 0x2, 0x2, 0x2 };
++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20,
++					     0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000,
++					     0x80000, 0x80000 };
++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 };
++
++/* Expected values of cumulative_saturation flag with input=max and
++   shift by -1.  */
++int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0;
++
++/* Expected results with input=max and shift by -1.  */
++VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
++					      0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 };
++VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++					       0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };
++VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
++					       0x40, 0x40, 0x40, 0x40,
++					       0x40, 0x40, 0x40, 0x40,
++					       0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,
++					       0x4000, 0x4000, 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000,
++					       0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000,
++					       0x4000000000000000 };
++VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						0x80, 0x80, 0x80, 0x80,
++						0x80, 0x80, 0x80, 0x80,
++						0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
++						0x8000, 0x8000, 0x8000, 0x8000 };
++VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000,
++						0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000,
++						0x8000000000000000 };
++
++/* Expected values of cumulative_saturation flag with input=max and
++   shift by -3.  */
++int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0;
++
++/* Expected results with input=max and shift by -3.  */
++VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
++					      0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 };
++VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
++					       0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 };
++VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 };
++VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
++					       0x10, 0x10, 0x10, 0x10,
++					       0x10, 0x10, 0x10, 0x10,
++					       0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000,
++					       0x1000, 0x1000, 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000,
++					       0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000,
++					       0x1000000000000000 };
++VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
++						0x20, 0x20, 0x20, 0x20,
++						0x20, 0x20, 0x20, 0x20,
++						0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000,
++						0x2000, 0x2000, 0x2000, 0x2000 };
++VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000,
++						0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000,
++						0x2000000000000000 };
++
++/* Expected values of cumulative_saturation flag with input=max and
++   large shift amount.  */
++int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1;
++
++/* Expected results with input=max and large shift amount.  */
++VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff,
++						 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff };
++VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
++						  0xffff, 0xffff };
++VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						 0x7f, 0x7f, 0x7f, 0x7f,
++						 0x7f, 0x7f, 0x7f, 0x7f,
++						 0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff,
++						 0x7fff, 0x7fff,
++						 0x7fff, 0x7fff,
++						 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++						 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff,
++						 0x7fffffffffffffff };
++VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
++						  0xffff, 0xffff,
++						  0xffff, 0xffff,
++						  0xffff, 0xffff };
++VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++						  0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
++						  0xffffffffffffffff };
++
++/* Expected values of cumulative_saturation flag with negative input and
++   large shift amount.  */
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1;
++
++/* Expected results with negative input and large shift amount.  */
++VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						    0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000,
++						     0x8000, 0x8000 };
++VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
++						      0xffff, 0xffff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff,
++						      0xffffffff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						     0x80, 0x80, 0x80, 0x80,
++						     0x80, 0x80, 0x80, 0x80,
++						     0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000,
++						     0x8000, 0x8000,
++						     0x8000, 0x8000,
++						     0x8000, 0x8000 };
++VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000,
++						     0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000,
++						     0x8000000000000000 };
++VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++						      0xff, 0xff, 0xff, 0xff,
++						      0xff, 0xff, 0xff, 0xff,
++						      0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
++						      0xffff, 0xffff,
++						      0xffff, 0xffff,
++						      0xffff, 0xffff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff,
++						      0xffffffff,
++						      0xffffffff,
++						      0xffffffff };
++VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
++						      0xffffffffffffffff };
++
++/* Expected values of cumulative_saturation flag with max/min input and
++   large negative shift amount.  */
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0;
++
++/* Expected results with max/min input and large negative shift amount.  */
++VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with input=0 and
++   large negative shift amount.  */
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0;
++
++/* Expected results with input=0 and large negative shift amount.  */
++VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						       0x0, 0x0, 0x0, 0x0,
++						       0x0, 0x0, 0x0, 0x0,
++						       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++							0x0, 0x0, 0x0, 0x0,
++							0x0, 0x0, 0x0, 0x0,
++							0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
++
++#define INSN vqrshl
++#define TEST_MSG "VQRSHL/VQRSHLQ"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* Basic test: v3=vqrshl(v1,v2), then store the result.  */
++#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		      VECT_VAR(vector_shift, T3, W, N));		\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
++  TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++
++  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
++
++  clean_results ();
++
++  /* Fill input vector with 0, to check saturation on limits.  */
++  VDUP(vector, , int, s, 8, 8, 0);
++  VDUP(vector, , int, s, 16, 4, 0);
++  VDUP(vector, , int, s, 32, 2, 0);
++  VDUP(vector, , int, s, 64, 1, 0);
++  VDUP(vector, , uint, u, 8, 8, 0);
++  VDUP(vector, , uint, u, 16, 4, 0);
++  VDUP(vector, , uint, u, 32, 2, 0);
++  VDUP(vector, , uint, u, 64, 1, 0);
++  VDUP(vector, q, int, s, 8, 16, 0);
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 8, 16, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0);
++  VDUP(vector, q, uint, u, 32, 4, 0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
++
++  /* Choose init value arbitrarily, will be used as shift amount */
++  /* Use values equal to or one-less-than the type width to check
++     behaviour on limits.  */
++  VDUP(vector_shift, , int, s, 8, 8, 7);
++  VDUP(vector_shift, , int, s, 16, 4, 15);
++  VDUP(vector_shift, , int, s, 32, 2, 31);
++  VDUP(vector_shift, , int, s, 64, 1, 63);
++  VDUP(vector_shift, q, int, s, 8, 16, 8);
++  VDUP(vector_shift, q, int, s, 16, 8, 16);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
++
++#define CMT " (with input = 0)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
++
++
++  /* Use negative shift amounts.  */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
++
++#undef CMT
++#define CMT " (input 0 and negative shift amount)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
++
++
++  /* Test again, with predefined input values.  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose init value arbitrarily, will be used as shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 1);
++  VDUP(vector_shift, , int, s, 16, 4, 3);
++  VDUP(vector_shift, , int, s, 32, 2, 8);
++  VDUP(vector_shift, , int, s, 64, 1, 3);
++  VDUP(vector_shift, q, int, s, 8, 16, 10);
++  VDUP(vector_shift, q, int, s, 16, 8, 12);
++  VDUP(vector_shift, q, int, s, 32, 4, 31);
++  VDUP(vector_shift, q, int, s, 64, 2, 63);
++
++#undef CMT
++#define CMT ""
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++
++
++  /* Use negative shift amounts.  */
++  VDUP(vector_shift, , int, s, 8, 8, -2);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
++
++#undef CMT
++#define CMT " (negative shift amount)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
++
++
++  /* Fill input vector with max value, to check saturation on
++     limits.  */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++  /* Use -1 shift amount to check cumulative saturation with
++     round_const.  */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -1);
++  VDUP(vector_shift, , int, s, 32, 2, -1);
++  VDUP(vector_shift, , int, s, 64, 1, -1);
++  VDUP(vector_shift, q, int, s, 8, 16, -1);
++  VDUP(vector_shift, q, int, s, 16, 8, -1);
++  VDUP(vector_shift, q, int, s, 32, 4, -1);
++  VDUP(vector_shift, q, int, s, 64, 2, -1);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: shift by -1)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT);
++
++
++  /* Use -3 shift amount to check cumulative saturation with
++     round_const. */
++  VDUP(vector_shift, , int, s, 8, 8, -3);
++  VDUP(vector_shift, , int, s, 16, 4, -3);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -3);
++  VDUP(vector_shift, q, int, s, 8, 16, -3);
++  VDUP(vector_shift, q, int, s, 16, 8, -3);
++  VDUP(vector_shift, q, int, s, 32, 4, -3);
++  VDUP(vector_shift, q, int, s, 64, 2, -3);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: shift by -3)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT);
++
++
++  /* Use large shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 10);
++  VDUP(vector_shift, , int, s, 16, 4, 20);
++  VDUP(vector_shift, , int, s, 32, 2, 40);
++  VDUP(vector_shift, , int, s, 64, 1, 70);
++  VDUP(vector_shift, q, int, s, 8, 16, 10);
++  VDUP(vector_shift, q, int, s, 16, 8, 20);
++  VDUP(vector_shift, q, int, s, 32, 4, 40);
++  VDUP(vector_shift, q, int, s, 64, 2, 70);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: large shift amount)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT);
++
++
++  /* Fill input vector with negative values, to check saturation on
++     limits.  */
++  VDUP(vector, , int, s, 8, 8, 0x80);
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);
++  VDUP(vector, q, int, s, 8, 16, 0x80);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++
++  /* Use large shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 10);
++  VDUP(vector_shift, , int, s, 16, 4, 20);
++  VDUP(vector_shift, , int, s, 32, 2, 40);
++  VDUP(vector_shift, , int, s, 64, 1, 70);
++  VDUP(vector_shift, q, int, s, 8, 16, 10);
++  VDUP(vector_shift, q, int, s, 16, 8, 20);
++  VDUP(vector_shift, q, int, s, 32, 4, 40);
++  VDUP(vector_shift, q, int, s, 64, 2, 70);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: large shift amount with negative input)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT);
++
++
++  /* Fill input vector with negative and positive values, to check
++   * saturation on limits */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, int, s, 8, 16, 0x80);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++
++  /* Use large negative shift amount  */
++  VDUP(vector_shift, , int, s, 8, 8, -10);
++  VDUP(vector_shift, , int, s, 16, 4, -20);
++  VDUP(vector_shift, , int, s, 32, 2, -40);
++  VDUP(vector_shift, , int, s, 64, 1, -70);
++  VDUP(vector_shift, q, int, s, 8, 16, -10);
++  VDUP(vector_shift, q, int, s, 16, 8, -20);
++  VDUP(vector_shift, q, int, s, 32, 4, -40);
++  VDUP(vector_shift, q, int, s, 64, 2, -70);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: large negative shift amount)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++
++
++  /* Fill input vector with 0, to check saturation in case of large
++   * shift amount */
++  VDUP(vector, , int, s, 8, 8, 0);
++  VDUP(vector, , int, s, 16, 4, 0);
++  VDUP(vector, , int, s, 32, 2, 0);
++  VDUP(vector, , int, s, 64, 1, 0);
++  VDUP(vector, q, int, s, 8, 16, 0);
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++
++  /* Use large shift amount  */
++  VDUP(vector_shift, , int, s, 8, 8, -10);
++  VDUP(vector_shift, , int, s, 16, 4, -20);
++  VDUP(vector_shift, , int, s, 32, 2, -40);
++  VDUP(vector_shift, , int, s, 64, 1, -70);
++  VDUP(vector_shift, q, int, s, 8, 16, -10);
++  VDUP(vector_shift, q, int, s, 16, 8, -20);
++  VDUP(vector_shift, q, int, s, 32, 4, -40);
++  VDUP(vector_shift, q, int, s, 64, 2, -70);
++
++#undef CMT
++#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)"
++  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
++  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++}
++
++int main (void)
++{
++  exec_vqrshl ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c
+@@ -0,0 +1,174 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
++				       0xfa, 0xfb, 0xfb, 0xfc };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++
++/* Expected values of cumulative_saturation flag with shift by 3.  */
++int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1;
++
++/* Expected results with shift by 3.  */
++VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					   0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++
++/* Expected values of cumulative_saturation flag with shift by max
++   amount.  */
++int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1;
++
++/* Expected results with shift by max amount.  */
++VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					     0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					      0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++
++#define INSN vqrshrn_n
++#define TEST_MSG "VQRSHRN_N"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* Basic test: y=vqrshrn_n(x,v), then store the result.  */
++#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));		\
++  VECT_VAR(vector_res, T1, W2, N) =					\
++    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
++		   V);							\
++  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),				\
++		VECT_VAR(vector_res, T1, W2, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  /* vector is twice as large as vector_res.  */
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++  DECL_VARIABLE(vector, uint, 16, 8);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, uint, 64, 2);
++
++  DECL_VARIABLE(vector_res, int, 8, 8);
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++
++  clean_results ();
++
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++  VLOAD(vector, buffer, q, int, s, 64, 2);
++  VLOAD(vector, buffer, q, uint, u, 16, 8);
++  VLOAD(vector, buffer, q, uint, u, 32, 4);
++  VLOAD(vector, buffer, q, uint, u, 64, 2);
++
++  /* Choose shift amount arbitrarily.  */
++#define CMT ""
++  TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
++  TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
++  TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
++  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
++  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
++  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++
++
++  /* Another set of tests, shifting max value by 3.  */
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++#undef CMT
++#define CMT " (check saturation: shift by 3)"
++  TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
++  TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
++  TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
++  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
++  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
++  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT);
++
++
++  /* Shift by max amount.  */
++#undef CMT
++#define CMT " (check saturation: shift by max)"
++  TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
++  TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
++  TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
++  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
++  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
++  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT);
++}
++
++int main (void)
++{
++  exec_vqrshrn_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c
+@@ -0,0 +1,189 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag with negative unput.  */
++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
++
++/* Expected results with negative input.  */
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with max input value
++   shifted by 1.  */
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
++
++/* Expected results with max input value shifted by 1.  */
++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
++
++/* Expected values of cumulative_saturation flag with max input value
++   shifted by max amount.  */
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
++
++/* Expected results with max input value shifted by max amount.  */
++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000,
++						   0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 };
++
++/* Expected values of cumulative_saturation flag with min input value
++   shifted by max amount.  */
++int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1;
++
++/* Expected results with min input value shifted by max amount.  */
++VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with inputs in usual
++   range.  */
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++
++/* Expected results with inputs in usual range.  */
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49,
++					0x49, 0x49, 0x49, 0x49 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf };
++
++#define INSN vqrshrun_n
++#define TEST_MSG "VQRSHRUN_N"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* Basic test: y=vqrshrun_n(x,v), then store the result.  */
++#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N));	\
++  VECT_VAR(vector_res, uint, W2, N) =					\
++    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
++		   V);							\
++  vst1_u##W2(VECT_VAR(result, uint, W2, N),				\
++	     VECT_VAR(vector_res, uint, W2, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  /* vector is twice as large as vector_res.  */
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++
++  clean_results ();
++
++  /* Fill input vector with negative values, to check saturation on
++     limits.  */
++  VDUP(vector, q, int, s, 16, 8, -2);
++  VDUP(vector, q, int, s, 32, 4, -3);
++  VDUP(vector, q, int, s, 64, 2, -4);
++
++  /* Choose shift amount arbitrarily.   */
++#define CMT " (negative input)"
++  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++
++
++  /* Fill input vector with max value, to check saturation on
++     limits.  */
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++
++  /* shift by 1.  */
++#undef CMT
++#define CMT " (check cumulative saturation: shift by 1)"
++  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
++  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
++  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
++
++
++  /* shift by max.  */
++#undef CMT
++#define CMT " (check cumulative saturation: shift by max, positive input)"
++  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
++
++
++  /* Fill input vector with min value, to check saturation on limits.  */
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++
++  /* shift by max  */
++#undef CMT
++#define CMT " (check cumulative saturation: shift by max, negative input)"
++  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT);
++  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT);
++  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT);
++
++
++  /* Fill input vector with positive values, to check normal case.  */
++  VDUP(vector, q, int, s, 16, 8, 0x1234);
++  VDUP(vector, q, int, s, 32, 4, 0x87654321);
++  VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
++
++  /* shift arbitrary amount.  */
++#undef CMT
++#define CMT ""
++  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
++  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
++  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++}
++
++int main (void)
++{
++  exec_vqrshrun_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c
+@@ -0,0 +1,829 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag with input=0.  */
++int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
++
++/* Expected results with input=0.  */
++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with input=0 and
++   negative shift amount.  */
++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
++
++/* Expected results with input=0 and negative shift amount.  */
++VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
++				       0xe8, 0xea, 0xec, 0xee };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80,
++					0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
++					0x8000, 0x8000, 0x8000, 0x8000 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
++					0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000,
++					0x8000000000000000 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++					 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
++					 0xffffffffffffffff };
++
++/* Expected values of cumulative_sat_saturation flag with negative shift
++   amount.  */
++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
++
++/* Expected results with negative shift amount.  */
++VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
++					   0xfa, 0xfa, 0xfb, 0xfb };
++VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc };
++VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
++					    0x7a, 0x7a, 0x7b, 0x7b };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
++VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					    0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff,
++					    0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff,
++					    0xffffffffffffffff };
++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++					     0x1, 0x1, 0x1, 0x1,
++					     0x1, 0x1, 0x1, 0x1,
++					     0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f,
++					     0x1f, 0x1f, 0x1f, 0x1f };
++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff,
++					     0x7ffff, 0x7ffff };
++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff };
++
++/* Expected values of cumulative_sat_saturation flag with negative
++   input and large shift amount.  */
++int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1;
++
++/* Expected results with negative input and large shift amount.  */
++VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						 0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000,
++						  0x8000, 0x8000 };
++VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff,
++						   0xffff, 0xffff };
++VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000,
++						  0x8000, 0x8000,
++						  0x8000, 0x8000,
++						  0x8000, 0x8000 };
++VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000,
++						  0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000,
++						  0x8000000000000000 };
++VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++						   0xff, 0xff, 0xff, 0xff,
++						   0xff, 0xff, 0xff, 0xff,
++						   0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff,
++						   0xffff, 0xffff,
++						   0xffff, 0xffff,
++						   0xffff, 0xffff };
++VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++						   0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff,
++						   0xffffffffffffffff };
++
++/* Expected values of cumulative_sat_saturation flag with max input
++   and shift by -1.  */
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0;
++
++/* Expected results with max input and shift by -1.  */
++VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
++						  0x3f, 0x3f, 0x3f, 0x3f };
++VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff,
++						   0x3fff, 0x3fff };
++VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff };
++VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff };
++VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						   0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff,
++						    0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff };
++VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
++						   0x3f, 0x3f, 0x3f, 0x3f,
++						   0x3f, 0x3f, 0x3f, 0x3f,
++						   0x3f, 0x3f, 0x3f, 0x3f };
++VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff,
++						   0x3fff, 0x3fff,
++						   0x3fff, 0x3fff,
++						   0x3fff, 0x3fff };
++VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff,
++						   0x3fffffff, 0x3fffffff };
++VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff,
++						   0x3fffffffffffffff };
++VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						    0x7f, 0x7f, 0x7f, 0x7f,
++						    0x7f, 0x7f, 0x7f, 0x7f,
++						    0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff,
++						    0x7fff, 0x7fff,
++						    0x7fff, 0x7fff,
++						    0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
++						    0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff,
++						    0x7fffffffffffffff };
++
++/* Expected values of cumulative_sat_saturation flag with max input
++   and large shift amount.  */
++int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1;
++
++/* Expected results with max input and large shift amount.  */
++VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					       0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff,
++						0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff };
++VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						0x7f, 0x7f, 0x7f, 0x7f,
++						0x7f, 0x7f, 0x7f, 0x7f,
++						0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff,
++						0x7fff, 0x7fff,
++						0x7fff, 0x7fff,
++						0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++						0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff,
++						0x7fffffffffffffff };
++VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++						 0xff, 0xff, 0xff, 0xff,
++						 0xff, 0xff, 0xff, 0xff,
++						 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff,
++						 0xffff, 0xffff,
++						 0xffff, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++						 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff,
++						 0xffffffffffffffff };
++
++/* Expected values of cumulative_sat_saturation flag with saturation
++   on 64-bits values.  */
++int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1;
++
++/* Expected results with saturation on 64-bits values..  */
++VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff,
++					   0x7fffffffffffffff };
++
++#define INSN vqshl
++#define TEST_MSG "VQSHL/VQSHLQ"
++
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
++{
++  /* Basic test: v3=vqshl(v1,v2), then store the result.  */
++#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		      VECT_VAR(vector_shift, T3, W, N));		\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
++  TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++
++  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
++
++  clean_results ();
++
++  /* Fill input vector with 0, to check saturation on limits.  */
++  VDUP(vector, , int, s, 8, 8, 0);
++  VDUP(vector, , int, s, 16, 4, 0);
++  VDUP(vector, , int, s, 32, 2, 0);
++  VDUP(vector, , int, s, 64, 1, 0);
++  VDUP(vector, , uint, u, 8, 8, 0);
++  VDUP(vector, , uint, u, 16, 4, 0);
++  VDUP(vector, , uint, u, 32, 2, 0);
++  VDUP(vector, , uint, u, 64, 1, 0);
++  VDUP(vector, q, int, s, 8, 16, 0);
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 8, 16, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0);
++  VDUP(vector, q, uint, u, 32, 4, 0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
++
++  /* Choose init value arbitrarily, will be used as shift amount */
++  /* Use values equal or one-less-than the type width to check
++     behaviour on limits.  */
++
++  /* 64-bits vectors first.  */
++  /* Shift 8-bits lanes by 7...  */
++  VDUP(vector_shift, , int, s, 8, 8, 7);
++  /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9).  */
++  VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);
++  VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);
++  VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);
++
++  /* Shift 16-bits lanes by 15... */
++  VDUP(vector_shift, , int, s, 16, 4, 15);
++  /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17).  */
++  VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);
++  VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);
++  VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);
++
++  /* Shift 32-bits lanes by 31... */
++  VDUP(vector_shift, , int, s, 32, 2, 31);
++  /* ... except lane 1 (by 30).  */
++  VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);
++
++  /* Shift 64 bits lane by 63.  */
++  VDUP(vector_shift, , int, s, 64, 1, 63);
++
++  /* 128-bits vectors.  */
++  /* Shift 8-bits lanes by 8.  */
++  VDUP(vector_shift, q, int, s, 8, 16, 8);
++  /* Shift 16-bits lanes by 16.  */
++  VDUP(vector_shift, q, int, s, 16, 8, 16);
++  /* Shift 32-bits lanes by 32...  */
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  /* ... except lane 1 (by 33).  */
++  VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);
++
++  /* Shift 64-bits lanes by 64... */
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
++  /* ... except lane 1 (by 62).  */
++  VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);
++
++#define CMT " (with input = 0)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
++
++
++  /* Use negative shift amounts */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
++
++#undef CMT
++#define CMT " (input 0 and negative shift amount)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
 +
- #define SIG_ATOMIC_TYPE "int"
- 
- #define INT8_TYPE "signed char"
-@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- #define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
- 
- #define INT_FAST8_TYPE "signed char"
--#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
--#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
-+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
-+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
- #define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
- #define UINT_FAST8_TYPE "unsigned char"
--#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
--#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
-+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
-+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
- #define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
- 
- #define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
---- a/src/gcc/config/linux.h
-+++ b/src/gcc/config/linux.h
-@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
- #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
- #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
- #else
- #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
- #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
- #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
- #endif
- 
- #define GNU_USER_TARGET_OS_CPP_BUILTINS()			\
-@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-     } while (0)
- 
- /* Determine which dynamic linker to use depending on whether GLIBC or
--   uClibc or Bionic is the default C library and whether
--   -muclibc or -mglibc or -mbionic has been passed to change the default.  */
-+   uClibc or Bionic or musl is the default C library and whether
-+   -muclibc or -mglibc or -mbionic or -mmusl has been passed to change
-+   the default.  */
- 
--#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3)	\
--  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"
-+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4)	\
-+  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}"
- 
- #if DEFAULT_LIBC == LIBC_GLIBC
--#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
--  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)
-+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
-+  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M)
- #elif DEFAULT_LIBC == LIBC_UCLIBC
--#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
--  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)
-+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
-+  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M)
- #elif DEFAULT_LIBC == LIBC_BIONIC
--#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
--  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)
-+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
-+  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M)
-+#elif DEFAULT_LIBC == LIBC_MUSL
-+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
-+  CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B)
- #else
- #error "Unsupported DEFAULT_LIBC"
- #endif /* DEFAULT_LIBC */
-@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- #define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
- #define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
- #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
-+/* Should be redefined for each target that supports musl.  */
-+#define MUSL_DYNAMIC_LINKER "/dev/null"
-+#define MUSL_DYNAMIC_LINKER32 "/dev/null"
-+#define MUSL_DYNAMIC_LINKER64 "/dev/null"
-+#define MUSL_DYNAMIC_LINKERX32 "/dev/null"
- 
- #define GNU_USER_DYNAMIC_LINKER						\
-   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER,	\
--			 BIONIC_DYNAMIC_LINKER)
-+			 BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER)
- #define GNU_USER_DYNAMIC_LINKER32					\
-   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \
--			 BIONIC_DYNAMIC_LINKER32)
-+			 BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32)
- #define GNU_USER_DYNAMIC_LINKER64					\
-   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \
--			 BIONIC_DYNAMIC_LINKER64)
-+			 BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64)
- #define GNU_USER_DYNAMIC_LINKERX32					\
-   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \
--			 BIONIC_DYNAMIC_LINKERX32)
-+			 BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32)
- 
- /* Whether we have Bionic libc runtime */
- #undef TARGET_HAS_BIONIC
- #define TARGET_HAS_BIONIC (OPTION_BIONIC)
- 
-+/* musl avoids problematic includes by rearranging the include directories.
-+ * Unfortunately, this is mostly duplicated from cppdefault.c */
-+#if DEFAULT_LIBC == LIBC_MUSL
-+#define INCLUDE_DEFAULTS_MUSL_GPP			\
-+    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1,		\
-+      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },		\
-+    { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1,		\
-+      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 },		\
-+    { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1,	\
-+      GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },
++  /* Test again, with predefined input values.  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
 +
-+#ifdef LOCAL_INCLUDE_DIR
-+#define INCLUDE_DEFAULTS_MUSL_LOCAL			\
-+    { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 },		\
-+    { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 },
-+#else
-+#define INCLUDE_DEFAULTS_MUSL_LOCAL
-+#endif
++  /* Choose init value arbitrarily, will be used as shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 1);
++  VDUP(vector_shift, , int, s, 16, 4, 3);
++  VDUP(vector_shift, , int, s, 32, 2, 8);
++  VDUP(vector_shift, , int, s, 64, 1, -3);
++  VDUP(vector_shift, q, int, s, 8, 16, 10);
++  VDUP(vector_shift, q, int, s, 16, 8, 12);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 63);
 +
-+#ifdef PREFIX_INCLUDE_DIR
-+#define INCLUDE_DEFAULTS_MUSL_PREFIX			\
-+    { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0},
-+#else
-+#define INCLUDE_DEFAULTS_MUSL_PREFIX
-+#endif
++#undef CMT
++#define CMT ""
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
 +
-+#ifdef CROSS_INCLUDE_DIR
-+#define INCLUDE_DEFAULTS_MUSL_CROSS			\
-+    { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0},
-+#else
-+#define INCLUDE_DEFAULTS_MUSL_CROSS
-+#endif
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
 +
-+#ifdef TOOL_INCLUDE_DIR
-+#define INCLUDE_DEFAULTS_MUSL_TOOL			\
-+    { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0},
-+#else
-+#define INCLUDE_DEFAULTS_MUSL_TOOL
-+#endif
 +
-+#ifdef NATIVE_SYSTEM_HEADER_DIR
-+#define INCLUDE_DEFAULTS_MUSL_NATIVE			\
-+    { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 },	\
-+    { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 },
-+#else
-+#define INCLUDE_DEFAULTS_MUSL_NATIVE
-+#endif
++  /* Use negative shift amounts */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
 +
-+#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT)
-+# undef INCLUDE_DEFAULTS_MUSL_LOCAL
-+# define INCLUDE_DEFAULTS_MUSL_LOCAL
-+# undef INCLUDE_DEFAULTS_MUSL_NATIVE
-+# define INCLUDE_DEFAULTS_MUSL_NATIVE
-+#else
-+# undef INCLUDE_DEFAULTS_MUSL_CROSS
-+# define INCLUDE_DEFAULTS_MUSL_CROSS
-+#endif
++#undef CMT
++#define CMT " (negative shift amount)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
 +
-+#undef INCLUDE_DEFAULTS
-+#define INCLUDE_DEFAULTS				\
-+  {							\
-+    INCLUDE_DEFAULTS_MUSL_GPP				\
-+    INCLUDE_DEFAULTS_MUSL_PREFIX			\
-+    INCLUDE_DEFAULTS_MUSL_CROSS				\
-+    INCLUDE_DEFAULTS_MUSL_TOOL				\
-+    INCLUDE_DEFAULTS_MUSL_NATIVE			\
-+    { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 },		\
-+    { 0, 0, 0, 0, 0, 0 }				\
-+  }
-+#endif
 +
- #if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */
- /* This is a *uclinux* target.  We don't define below macros to normal linux
-    versions, because doing so would require *uclinux* targets to include
---- a/src/gcc/config/linux.opt
-+++ b/src/gcc/config/linux.opt
-@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)
- Use GNU C library
- 
- muclibc
--Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)
-+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl)
- Use uClibc C library
++  /* Use large shift amounts.  */
++  VDUP(vector_shift, , int, s, 8, 8, 8);
++  VDUP(vector_shift, , int, s, 16, 4, 16);
++  VDUP(vector_shift, , int, s, 32, 2, 32);
++  VDUP(vector_shift, , int, s, 64, 1, 64);
++  VDUP(vector_shift, q, int, s, 8, 16, 8);
++  VDUP(vector_shift, q, int, s, 16, 8, 16);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
 +
-+mmusl
-+Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic)
-+Use musl C library
---- a/src/gcc/config/mips/linux.h
-+++ b/src/gcc/config/mips/linux.h
-@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3.  If not see
- #define UCLIBC_DYNAMIC_LINKERN32 \
-   "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"
- 
-+#undef MUSL_DYNAMIC_LINKER32
-+#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1"
-+#undef MUSL_DYNAMIC_LINKER64
-+#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1"
-+#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1"
++#undef CMT
++#define CMT " (large shift amount, negative input)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT);
 +
- #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
- #define GNU_USER_DYNAMIC_LINKERN32 \
-   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
--                         BIONIC_DYNAMIC_LINKERN32)
-+                         BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32)
---- a/src/gcc/config/rs6000/linux.h
-+++ b/src/gcc/config/rs6000/linux.h
-@@ -30,10 +30,14 @@
- #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
- #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
- #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
- #else
- #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
- #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
- #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
- #endif
- 
- /* Determine what functions are present at the runtime;
---- a/src/gcc/config/rs6000/linux64.h
-+++ b/src/gcc/config/rs6000/linux64.h
-@@ -299,10 +299,14 @@ extern int dot_symbols;
- #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
- #define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
- #define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (DEFAULT_LIBC == LIBC_MUSL)
- #else
- #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
- #define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
- #define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
-+#undef OPTION_MUSL
-+#define OPTION_MUSL   (linux_libc == LIBC_MUSL)
- #endif
- 
- /* Determine what functions are present at the runtime;
---- a/src/gcc/configure
-+++ b/src/gcc/configure
-@@ -1699,7 +1699,8 @@ Optional Packages:
-                           use sysroot as the system root during the build
-   --with-sysroot[=DIR]    search for usr/lib, usr/include, et al, within DIR
-   --with-specs=SPECS      add SPECS to driver command-line processing
--  --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
-+  --with-pkgversion=PKG   Use PKG in the version string in place of "Linaro
-+                          GCC `cat $srcdir/LINARO-VERSION`"
-   --with-bugurl=URL       Direct users to URL to report a bug
-   --with-multilib-list    select multilibs (AArch64, SH and x86-64 only)
-   --with-gnu-ld           assume the C compiler uses GNU ld default=no
-@@ -7362,7 +7363,7 @@ if test "${with_pkgversion+set}" = set; then :
-       *)   PKGVERSION="($withval) " ;;
-      esac
- else
--  PKGVERSION="(GCC) "
-+  PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
- 
- fi
- 
-@@ -18162,7 +18163,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 18165 "configure"
-+#line 18166 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -18268,7 +18269,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 18271 "configure"
-+#line 18272 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -27742,6 +27743,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then :
- else
-   gcc_cv_libc_provides_ssp=no
-     case "$target" in
-+       *-*-musl*)
-+	 # All versions of musl provide stack protector
-+	 gcc_cv_libc_provides_ssp=yes;;
-        *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
-       # glibc 2.4 and later provides __stack_chk_fail and
-       # either __stack_chk_guard, or TLS access to stack guard canary.
-@@ -27774,6 +27778,7 @@ fi
- 	 # <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
- 	 # simply assert that glibc does provide this, which is true for all
- 	 # realistically usable GNU/Hurd configurations.
-+	 # All supported versions of musl provide it as well
- 	 gcc_cv_libc_provides_ssp=yes;;
-        *-*-darwin* | *-*-freebsd*)
- 	 ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail"
-@@ -27870,6 +27875,9 @@ case "$target" in
-       gcc_cv_target_dl_iterate_phdr=no
-     fi
-     ;;
-+  *-linux-musl*)
-+    gcc_cv_target_dl_iterate_phdr=yes
-+    ;;
- esac
- 
- if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
---- a/src/gcc/configure.ac
-+++ b/src/gcc/configure.ac
-@@ -862,7 +862,7 @@ AC_ARG_WITH(specs,
- )
- AC_SUBST(CONFIGURE_SPECS)
- 
--ACX_PKGVERSION([GCC])
-+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
- ACX_BUGURL([http://gcc.gnu.org/bugs.html])
- 
- # Sanity check enable_languages in case someone does not run the toplevel
-@@ -5229,6 +5229,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
-       gcc_cv_libc_provides_ssp,
-       [gcc_cv_libc_provides_ssp=no
-     case "$target" in
-+       *-*-musl*)
-+	 # All versions of musl provide stack protector
-+	 gcc_cv_libc_provides_ssp=yes;;
-        *-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
-       # glibc 2.4 and later provides __stack_chk_fail and
-       # either __stack_chk_guard, or TLS access to stack guard canary.
-@@ -5255,6 +5258,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
- 	 # <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
- 	 # simply assert that glibc does provide this, which is true for all
- 	 # realistically usable GNU/Hurd configurations.
-+	 # All supported versions of musl provide it as well
- 	 gcc_cv_libc_provides_ssp=yes;;
-        *-*-darwin* | *-*-freebsd*)
- 	 AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes],
-@@ -5328,6 +5332,9 @@ case "$target" in
-       gcc_cv_target_dl_iterate_phdr=no
-     fi
-     ;;
-+  *-linux-musl*)
-+    gcc_cv_target_dl_iterate_phdr=yes
-+    ;;
- esac
- GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR])
- if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
---- a/src/gcc/cp/Make-lang.in
-+++ b/src/gcc/cp/Make-lang.in
-@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets
- # List of targets that can use the generic check- rule and its // variant.
- lang_checks += check-g++
- lang_checks_parallelized += check-g++
--# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
-+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
- check_g++_parallelize = 10000
- #
- # Install hooks:
-@@ -221,6 +221,7 @@ c++.mostlyclean:
- 	-rm -f doc/g++.1
- 	-rm -f cp/*$(objext)
- 	-rm -f cp/*$(coverageexts)
-+	-rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext)
- c++.clean:
- c++.distclean:
- 	-rm -f cp/config.status cp/Makefile
---- a/src/gcc/cppbuiltin.c
-+++ b/src/gcc/cppbuiltin.c
-@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel)
-     *patchlevel = s_patchlevel;
- }
- 
-+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
-+   to create Linaro release number YYYYMM and spin version.  */
-+static void
-+parse_linarover (int *release, int *spin)
-+{
-+  static int s_year = -1, s_month, s_spin;
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT);
++
++
++  /* Fill input vector with max value, to check saturation on limits */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
 +
-+  if (s_year == -1)
-+    if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
-+      {
-+	sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
-+	s_spin = 0;
-+      }
++  /* Shift by -1 */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -1);
++  VDUP(vector_shift, , int, s, 32, 2, -1);
++  VDUP(vector_shift, , int, s, 64, 1, -1);
++  VDUP(vector_shift, q, int, s, 8, 16, -1);
++  VDUP(vector_shift, q, int, s, 16, 8, -1);
++  VDUP(vector_shift, q, int, s, 32, 4, -1);
++  VDUP(vector_shift, q, int, s, 64, 2, -1);
 +
-+  if (release)
-+    *release = s_year * 100 + s_month;
++#undef CMT
++#define CMT " (max input, shift by -1)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT);
 +
-+  if (spin)
-+    *spin = s_spin;
-+}
- 
- /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__.  */
- static void
- define__GNUC__ (cpp_reader *pfile)
- {
--  int major, minor, patchlevel;
-+  int major, minor, patchlevel, linaro_release, linaro_spin;
- 
-   parse_basever (&major, &minor, &patchlevel);
-+  parse_linarover (&linaro_release, &linaro_spin);
-   cpp_define_formatted (pfile, "__GNUC__=%d", major);
-   cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
-   cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
-   cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
-+  cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
-+  cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
-   cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
-   cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
-   cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
---- a/src/gcc/cprop.c
-+++ b/src/gcc/cprop.c
-@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x)
-   return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
- }
- 
-+/* Determine whether the rtx X should be treated as a register that can
-+   be propagated.  Any pseudo-register is fine.  */
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT);
 +
-+static bool
-+cprop_reg_p (const_rtx x)
-+{
-+  return REG_P (x) && !HARD_REGISTER_P (x);
-+}
 +
- /* Scan SET present in INSN and add an entry to the hash TABLE.
-    IMPLICIT is true if it's an implicit set, false otherwise.  */
- 
-@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
-   rtx src = SET_SRC (set);
-   rtx dest = SET_DEST (set);
- 
--  if (REG_P (dest)
--      && ! HARD_REGISTER_P (dest)
-+  if (cprop_reg_p (dest)
-       && reg_available_p (dest, insn)
-       && can_copy_p (GET_MODE (dest)))
-     {
-@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
- 	src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
- 
-       /* Record sets for constant/copy propagation.  */
--      if ((REG_P (src)
-+      if ((cprop_reg_p (src)
- 	   && src != dest
--	   && ! HARD_REGISTER_P (src)
- 	   && reg_available_p (src, insn))
- 	  || cprop_constant_p (src))
- 	insert_set_in_table (dest, src, insn, table, implicit);
-@@ -821,15 +828,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
-   return success;
- }
- 
--/* Find a set of REGNOs that are available on entry to INSN's block.  Return
--   NULL no such set is found.  */
-+/* Find a set of REGNOs that are available on entry to INSN's block.  If found,
-+   SET_RET[0] will be assigned a set with a register source and SET_RET[1] a
-+   set with a constant source.  If not found the corresponding entry is set to
-+   NULL.  */
- 
--static struct cprop_expr *
--find_avail_set (int regno, rtx_insn *insn)
-+static void
-+find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2])
- {
--  /* SET1 contains the last set found that can be returned to the caller for
--     use in a substitution.  */
--  struct cprop_expr *set1 = 0;
-+  set_ret[0] = set_ret[1] = NULL;
- 
-   /* Loops are not possible here.  To get a loop we would need two sets
-      available at the start of the block containing INSN.  i.e. we would
-@@ -869,8 +876,10 @@ find_avail_set (int regno, rtx_insn *insn)
-          If the source operand changed, we may still use it for the next
-          iteration of this loop, but we may not use it for substitutions.  */
- 
--      if (cprop_constant_p (src) || reg_not_set_p (src, insn))
--	set1 = set;
-+      if (cprop_constant_p (src))
-+	set_ret[1] = set;
-+      else if (reg_not_set_p (src, insn))
-+	set_ret[0] = set;
- 
-       /* If the source of the set is anything except a register, then
- 	 we have reached the end of the copy chain.  */
-@@ -881,10 +890,6 @@ find_avail_set (int regno, rtx_insn *insn)
- 	 and see if we have an available copy into SRC.  */
-       regno = REGNO (src);
-     }
--
--  /* SET1 holds the last set that was available and anticipatable at
--     INSN.  */
--  return set1;
- }
- 
- /* Subroutine of cprop_insn that tries to propagate constants into
-@@ -1050,40 +1055,40 @@ cprop_insn (rtx_insn *insn)
-   int changed = 0, changed_this_round;
-   rtx note;
- 
--retry:
--  changed_this_round = 0;
--  reg_use_count = 0;
--  note_uses (&PATTERN (insn), find_used_regs, NULL);
--
--  /* We may win even when propagating constants into notes.  */
--  note = find_reg_equal_equiv_note (insn);
--  if (note)
--    find_used_regs (&XEXP (note, 0), NULL);
--
--  for (i = 0; i < reg_use_count; i++)
-+  do
-     {
--      rtx reg_used = reg_use_table[i];
--      unsigned int regno = REGNO (reg_used);
--      rtx src;
--      struct cprop_expr *set;
-+      changed_this_round = 0;
-+      reg_use_count = 0;
-+      note_uses (&PATTERN (insn), find_used_regs, NULL);
- 
--      /* If the register has already been set in this block, there's
--	 nothing we can do.  */
--      if (! reg_not_set_p (reg_used, insn))
--	continue;
-+      /* We may win even when propagating constants into notes.  */
-+      note = find_reg_equal_equiv_note (insn);
-+      if (note)
-+	find_used_regs (&XEXP (note, 0), NULL);
- 
--      /* Find an assignment that sets reg_used and is available
--	 at the start of the block.  */
--      set = find_avail_set (regno, insn);
--      if (! set)
--	continue;
-+      for (i = 0; i < reg_use_count; i++)
-+	{
-+	  rtx reg_used = reg_use_table[i];
-+	  unsigned int regno = REGNO (reg_used);
-+	  rtx src_cst = NULL, src_reg = NULL;
-+	  struct cprop_expr *set[2];
- 
--      src = set->src;
-+	  /* If the register has already been set in this block, there's
-+	     nothing we can do.  */
-+	  if (! reg_not_set_p (reg_used, insn))
-+	    continue;
- 
--      /* Constant propagation.  */
--      if (cprop_constant_p (src))
--	{
--          if (constprop_register (reg_used, src, insn))
-+	  /* Find an assignment that sets reg_used and is available
-+	     at the start of the block.  */
-+	  find_avail_set (regno, insn, set);
-+	  if (set[0])
-+	    src_reg = set[0]->src;
-+	  if (set[1])
-+	    src_cst = set[1]->src;
++  /* Use large shift amounts */
++  VDUP(vector_shift, , int, s, 8, 8, 8);
++  VDUP(vector_shift, , int, s, 16, 4, 16);
++  VDUP(vector_shift, , int, s, 32, 2, 32);
++  VDUP(vector_shift, , int, s, 64, 1, 64);
++  VDUP(vector_shift, q, int, s, 8, 16, 8);
++  VDUP(vector_shift, q, int, s, 16, 8, 16);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
++
++#undef CMT
++#define CMT " (max input, large shift amount)"
++  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT);
++  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT);
 +
-+	  /* Constant propagation.  */
-+	  if (src_cst && cprop_constant_p (src_cst)
-+	      && constprop_register (reg_used, src_cst, insn))
- 	    {
- 	      changed_this_round = changed = 1;
- 	      global_const_prop_count++;
-@@ -1093,18 +1098,16 @@ retry:
- 			   "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
- 		  fprintf (dump_file, "insn %d with constant ",
- 			   INSN_UID (insn));
--		  print_rtl (dump_file, src);
-+		  print_rtl (dump_file, src_cst);
- 		  fprintf (dump_file, "\n");
- 		}
- 	      if (insn->deleted ())
- 		return 1;
- 	    }
--	}
--      else if (REG_P (src)
--	       && REGNO (src) >= FIRST_PSEUDO_REGISTER
--	       && REGNO (src) != regno)
--	{
--	  if (try_replace_reg (reg_used, src, insn))
-+	  /* Copy propagation.  */
-+	  else if (src_reg && cprop_reg_p (src_reg)
-+		   && REGNO (src_reg) != regno
-+		   && try_replace_reg (reg_used, src_reg, insn))
- 	    {
- 	      changed_this_round = changed = 1;
- 	      global_copy_prop_count++;
-@@ -1113,7 +1116,7 @@ retry:
- 		  fprintf (dump_file,
- 			   "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
- 			   regno, INSN_UID (insn));
--		  fprintf (dump_file, " with reg %d\n", REGNO (src));
-+		  fprintf (dump_file, " with reg %d\n", REGNO (src_reg));
- 		}
- 
- 	      /* The original insn setting reg_used may or may not now be
-@@ -1123,12 +1126,10 @@ retry:
- 		 and made things worse.  */
- 	    }
- 	}
--
--      /* If try_replace_reg simplified the insn, the regs found
--	 by find_used_regs may not be valid anymore.  Start over.  */
--      if (changed_this_round)
--	goto retry;
-     }
-+  /* If try_replace_reg simplified the insn, the regs found by find_used_regs
-+     may not be valid anymore.  Start over.  */
-+  while (changed_this_round);
- 
-   if (changed && DEBUG_INSN_P (insn))
-     return 0;
-@@ -1191,7 +1192,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
-   /* Rule out USE instructions and ASM statements as we don't want to
-      change the hard registers mentioned.  */
-   if (REG_P (x)
--      && (REGNO (x) >= FIRST_PSEUDO_REGISTER
-+      && (cprop_reg_p (x)
-           || (GET_CODE (PATTERN (insn)) != USE
- 	      && asm_noperands (PATTERN (insn)) < 0)))
-     {
-@@ -1207,7 +1208,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
- 
- 	  if (cprop_constant_p (this_rtx))
- 	    newcnst = this_rtx;
--	  if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
-+	  if (cprop_reg_p (this_rtx)
- 	      /* Don't copy propagate if it has attached REG_EQUIV note.
- 		 At this point this only function parameters should have
- 		 REG_EQUIV notes and if the argument slot is used somewhere
-@@ -1328,9 +1329,8 @@ implicit_set_cond_p (const_rtx cond)
-   if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE)
-     return false;
- 
--  /* The first operand of COND must be a pseudo-reg.  */
--  if (! REG_P (XEXP (cond, 0))
--      || HARD_REGISTER_P (XEXP (cond, 0)))
-+  /* The first operand of COND must be a register we can propagate.  */
-+  if (!cprop_reg_p (XEXP (cond, 0)))
-     return false;
- 
-   /* The second operand of COND must be a suitable constant.  */
---- a/src/gcc/df-core.c
-+++ b/src/gcc/df-core.c
-@@ -642,7 +642,6 @@ void
- df_finish_pass (bool verify ATTRIBUTE_UNUSED)
- {
-   int i;
--  int removed = 0;
- 
- #ifdef ENABLE_DF_CHECKING
-   int saved_flags;
-@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED)
-   saved_flags = df->changeable_flags;
- #endif
- 
--  for (i = 0; i < df->num_problems_defined; i++)
-+  /* We iterate over problems by index as each problem removed will
-+     lead to problems_in_order to be reordered.  */
-+  for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++)
-     {
--      struct dataflow *dflow = df->problems_in_order[i];
--      struct df_problem *problem = dflow->problem;
-+      struct dataflow *dflow = df->problems_by_index[i];
- 
--      if (dflow->optional_p)
--	{
--	  gcc_assert (problem->remove_problem_fun);
--	  (problem->remove_problem_fun) ();
--	  df->problems_in_order[i] = NULL;
--	  df->problems_by_index[problem->id] = NULL;
--	  removed++;
--	}
-+      if (dflow && dflow->optional_p)
-+	df_remove_problem (dflow);
-     }
--  df->num_problems_defined -= removed;
- 
-   /* Clear all of the flags.  */
-   df->changeable_flags = 0;
---- a/src/gcc/fortran/Make-lang.in
-+++ b/src/gcc/fortran/Make-lang.in
-@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets
- check-fortran-subtargets : check-gfortran-subtargets
- lang_checks += check-gfortran
- lang_checks_parallelized += check-gfortran
--# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
-+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
- check_gfortran_parallelize = 10000
- 
- # GFORTRAN documentation.
-@@ -275,7 +275,7 @@ fortran.uninstall:
- # We just have to delete files specific to us.
- 
- fortran.mostlyclean:
--	-rm -f f951$(exeext)
-+	-rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext)
- 	-rm -f fortran/*.o
- 
- fortran.clean:
---- a/src/gcc/genpreds.c
-+++ b/src/gcc/genpreds.c
-@@ -640,12 +640,14 @@ struct constraint_data
-   const char *regclass;  /* for register constraints */
-   rtx exp;               /* for other constraints */
-   unsigned int lineno;   /* line of definition */
--  unsigned int is_register  : 1;
--  unsigned int is_const_int : 1;
--  unsigned int is_const_dbl : 1;
--  unsigned int is_extra     : 1;
--  unsigned int is_memory    : 1;
--  unsigned int is_address   : 1;
-+  unsigned int is_register	: 1;
-+  unsigned int is_const_int	: 1;
-+  unsigned int is_const_dbl	: 1;
-+  unsigned int is_extra		: 1;
-+  unsigned int is_memory	: 1;
-+  unsigned int is_address	: 1;
-+  unsigned int maybe_allows_reg : 1;
-+  unsigned int maybe_allows_mem : 1;
- };
- 
- /* Overview of all constraints beginning with a given letter.  */
-@@ -691,6 +693,9 @@ static unsigned int satisfied_start;
- static unsigned int const_int_start, const_int_end;
- static unsigned int memory_start, memory_end;
- static unsigned int address_start, address_end;
-+static unsigned int maybe_allows_none_start, maybe_allows_none_end;
-+static unsigned int maybe_allows_reg_start, maybe_allows_reg_end;
-+static unsigned int maybe_allows_mem_start, maybe_allows_mem_end;
- 
- /* Convert NAME, which contains angle brackets and/or underscores, to
-    a string that can be used as part of a C identifier.  The string
-@@ -711,6 +716,34 @@ mangle (const char *name)
-   return XOBFINISH (rtl_obstack, const char *);
- }
- 
-+/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP
-+   maybe allows a MEM.  Bits should be clear only when we are sure it
-+   will not allow a REG/SUBREG or a MEM.  */
-+static int
-+compute_maybe_allows (rtx exp)
++
++  /* Check 64 bits saturation.  */
++  VDUP(vector, , int, s, 64, 1, -10);
++  VDUP(vector_shift, , int, s, 64, 1, 64);
++  VDUP(vector, q, int, s, 64, 2, 10);
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
++
++#undef CMT
++#define CMT " (check saturation on 64 bits)"
++  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT);
++  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT);
++
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT);
++}
++
++int main (void)
 +{
-+  switch (GET_CODE (exp))
-+    {
-+    case IF_THEN_ELSE:
-+      /* Conservative answer is like IOR, of the THEN and ELSE branches.  */
-+      return compute_maybe_allows (XEXP (exp, 1))
-+	     | compute_maybe_allows (XEXP (exp, 2));
-+    case AND:
-+      return compute_maybe_allows (XEXP (exp, 0))
-+	     & compute_maybe_allows (XEXP (exp, 1));
-+    case IOR:
-+      return compute_maybe_allows (XEXP (exp, 0))
-+	     | compute_maybe_allows (XEXP (exp, 1));
-+    case MATCH_CODE:
-+      if (*XSTR (exp, 1) == '\0')
-+	return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0)
-+	       | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0);
-+      /* FALLTHRU */
-+    default:
-+      return 3;
-+    }
++  exec_vqshl ();
++  return 0;
 +}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c
+@@ -0,0 +1,234 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
++				       0xd0, 0xd4, 0xd8, 0xdc };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
++					0xd0, 0xd4, 0xd8, 0xdc,
++					0xe0, 0xe4, 0xe8, 0xec,
++					0xf0, 0xf4, 0xf8, 0xfc };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
++					0xffe8, 0xffea, 0xffec, 0xffee };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,
++					0xffffffe4, 0xffffffe6 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff,
++					 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++					 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
++					 0xffffffffffffffff };
 +
- /* Add one constraint, of any sort, to the tables.  NAME is its name;
-    REGCLASS is the register class, if any; EXP is the expression to
-    test, if any;  IS_MEMORY and IS_ADDRESS indicate memory and address
-@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass,
-   c->is_extra = !(regclass || is_const_int || is_const_dbl);
-   c->is_memory = is_memory;
-   c->is_address = is_address;
-+  int maybe_allows = 3;
-+  if (exp)
-+    maybe_allows = compute_maybe_allows (exp);
-+  c->maybe_allows_reg = (maybe_allows & 1) != 0;
-+  c->maybe_allows_mem = (maybe_allows & 2) != 0;
- 
-   c->next_this_letter = *slot;
-   *slot = c;
-@@ -940,8 +978,30 @@ choose_enum_order (void)
-       enum_order[next++] = c;
-   address_end = next;
- 
-+  maybe_allows_none_start = next;
-+  FOR_ALL_CONSTRAINTS (c)
-+    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
-+	&& !c->maybe_allows_reg && !c->maybe_allows_mem)
-+      enum_order[next++] = c;
-+  maybe_allows_none_end = next;
++/* Expected values of cumulative_saturation flag with max positive input.  */
++int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1;
 +
-+  maybe_allows_reg_start = next;
-+  FOR_ALL_CONSTRAINTS (c)
-+    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
-+	&& c->maybe_allows_reg && !c->maybe_allows_mem)
-+      enum_order[next++] = c;
-+  maybe_allows_reg_end = next;
++/* Expected results with max positive input.  */
++VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					   0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff };
++VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					    0x7f, 0x7f, 0x7f, 0x7f,
++					    0x7f, 0x7f, 0x7f, 0x7f,
++					    0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
++					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
++					    0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff,
++					    0x7fffffffffffffff };
++VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					     0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++					     0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff,
++					     0xffffffffffffffff };
 +
-+  maybe_allows_mem_start = next;
-+  FOR_ALL_CONSTRAINTS (c)
-+    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
-+	&& !c->maybe_allows_reg && c->maybe_allows_mem)
-+      enum_order[next++] = c;
-+  maybe_allows_mem_end = next;
++#define INSN vqshl
++#define TEST_MSG "VQSHL_N/VQSHLQ_N"
 +
-   FOR_ALL_CONSTRAINTS (c)
--    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address)
-+    if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
-+	&& c->maybe_allows_reg && c->maybe_allows_mem)
-       enum_order[next++] = c;
-   gcc_assert (next == num_constraints);
- }
-@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end)
- 	    "}\n\n", name);
- }
- 
-+/* Write a definition for insn_extra_constraint_allows_reg_mem function.  */
-+static void
-+write_allows_reg_mem_function (void)
++#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
++#define FNNAME(NAME) FNNAME1(NAME)
++
++FNNAME (INSN)
 +{
-+  printf ("static inline void\n"
-+	  "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n"
-+	  "\t\t\t\t      bool *allows_reg, bool *allows_mem)\n"
-+	  "{\n");
-+  if (maybe_allows_none_start != maybe_allows_none_end)
-+    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
-+	    "    return;\n",
-+	    enum_order[maybe_allows_none_start]->c_name,
-+	    enum_order[maybe_allows_none_end - 1]->c_name);
-+  if (maybe_allows_reg_start != maybe_allows_reg_end)
-+    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
-+	    "    {\n"
-+	    "      *allows_reg = true;\n"
-+	    "      return;\n"
-+	    "    }\n",
-+	    enum_order[maybe_allows_reg_start]->c_name,
-+	    enum_order[maybe_allows_reg_end - 1]->c_name);
-+  if (maybe_allows_mem_start != maybe_allows_mem_end)
-+    printf ("  if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
-+	    "    {\n"
-+	    "      *allows_mem = true;\n"
-+	    "      return;\n"
-+	    "    }\n",
-+	    enum_order[maybe_allows_mem_start]->c_name,
-+	    enum_order[maybe_allows_mem_end - 1]->c_name);
-+  printf ("  (void) c;\n"
-+	  "  *allows_reg = true;\n"
-+	  "  *allows_mem = true;\n"
-+	  "}\n\n");
-+}
++  /* Basic test: v2=vqshl_n(v1,v), then store the result.  */
++#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
++  TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++
++  clean_results ();
++
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose shift amount arbitrarily.  */
++#define CMT ""
++  TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT);
++
++  TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++
++
++  /* Fill input vector with max value, to check saturation on limits.  */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++#undef CMT
++#define CMT " (with max input)"
++  TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT);
++
++  TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT);
++  TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT);
 +
- /* VEC is a list of key/value pairs, with the keys being lower bounds
-    of a range.  Output a decision tree that handles the keys covered by
-    [VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s.
-@@ -1326,6 +1421,7 @@ write_tm_preds_h (void)
- 			    memory_start, memory_end);
-       write_range_function ("insn_extra_address_constraint",
- 			    address_start, address_end);
-+      write_allows_reg_mem_function ();
- 
-       if (constraint_max_namelen > 1)
-         {
---- a/src/gcc/go/Make-lang.in
-+++ b/src/gcc/go/Make-lang.in
-@@ -197,6 +197,7 @@ go.uninstall:
- go.mostlyclean:
- 	-rm -f go/*$(objext)
- 	-rm -f go/*$(coverageexts)
-+	-rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext)
- go.clean:
- go.distclean:
- go.maintainer-clean:
---- a/src/gcc/ira-costs.c
-+++ b/src/gcc/ira-costs.c
-@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
-       rtx dest = SET_DEST (set);
-       rtx src = SET_SRC (set);
- 
--      dest = SET_DEST (set);
--      src = SET_SRC (set);
-       if (GET_CODE (dest) == SUBREG
- 	  && (GET_MODE_SIZE (GET_MODE (dest))
- 	      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
---- a/src/gcc/jit/Make-lang.in
-+++ b/src/gcc/jit/Make-lang.in
-@@ -285,6 +285,10 @@ jit.uninstall:
- # We just have to delete files specific to us.
- 
- jit.mostlyclean:
-+	-rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK)
-+	-rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME)
-+	-rm -f $(LIBGCCJIT_SONAME)
-+	-rm -f $(jit_OBJS)
- 
- jit.clean:
- 
---- a/src/gcc/loop-invariant.c
-+++ b/src/gcc/loop-invariant.c
-@@ -740,8 +740,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
- 	 enough to not regress 410.bwaves either (by still moving reg+reg
- 	 invariants).
- 	 See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html .  */
--      inv->cheap_address = address_cost (SET_SRC (set), word_mode,
--					 ADDR_SPACE_GENERIC, speed) < 3;
-+      if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
-+	inv->cheap_address = address_cost (SET_SRC (set), word_mode,
-+					   ADDR_SPACE_GENERIC, speed) < 3;
-+      else
-+	inv->cheap_address = false;
-     }
-   else
-     {
-@@ -1174,6 +1177,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
-     }
- 
-   if (!inv->cheap_address
-+      || inv->def->n_uses == 0
-       || inv->def->n_addr_uses < inv->def->n_uses)
-     (*comp_cost) += inv->cost * inv->eqno;
- 
-@@ -1512,6 +1516,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group)
-   return 1;
- }
- 
-+/* Whether invariant INV setting REG can be moved out of LOOP, at the end of
-+   the block preceding its header.  */
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT);
++}
 +
-+static bool
-+can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
++int main (void)
 +{
-+  df_ref def, use;
-+  unsigned int dest_regno, defs_in_loop_count = 0;
-+  rtx_insn *insn = inv->insn;
-+  basic_block bb = BLOCK_FOR_INSN (inv->insn);
++  exec_vqshl_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c
+@@ -0,0 +1,263 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* We ignore hard register and memory access for cost and complexity reasons.
-+     Hard register are few at this stage and expensive to consider as they
-+     require building a separate data flow.  Memory access would require using
-+     df_simulate_* and can_move_insns_across functions and is more complex.  */
-+  if (!REG_P (reg) || HARD_REGISTER_P (reg))
-+    return false;
++/* Expected values of cumulative_saturation flag with negative
++   input.  */
++int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
 +
-+  /* Check whether the set is always executed.  We could omit this condition if
-+     we know that the register is unused outside of the loop, but it does not
-+     seem worth finding out.  */
-+  if (!inv->always_executed)
-+    return false;
++/* Expected results with negative input.  */
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 };
 +
-+  /* Check that all uses that would be dominated by def are already dominated
-+     by it.  */
-+  dest_regno = REGNO (reg);
-+  for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
-+    {
-+      rtx_insn *use_insn;
-+      basic_block use_bb;
++/* Expected values of cumulative_saturation flag with shift by 1.  */
++int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0;
 +
-+      use_insn = DF_REF_INSN (use);
-+      use_bb = BLOCK_FOR_INSN (use_insn);
++/* Expected results with shift by 1.  */
++VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
++					    0xfe, 0xfe, 0xfe, 0xfe };
++VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
++VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe };
++VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
++					     0xfe, 0xfe, 0xfe, 0xfe,
++					     0xfe, 0xfe, 0xfe, 0xfe,
++					     0xfe, 0xfe, 0xfe, 0xfe };
++VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe,
++					     0xfffe, 0xfffe, 0xfffe, 0xfffe };
++VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe,
++					     0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe,
++					     0xfffffffffffffffe };
 +
-+      /* Ignore instruction considered for moving.  */
-+      if (use_insn == insn)
-+	continue;
++/* Expected values of cumulative_saturation flag with shift by 2.  */
++int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1;
 +
-+      /* Don't consider uses outside loop.  */
-+      if (!flow_bb_inside_loop_p (loop, use_bb))
-+	continue;
++/* Expected results with shift by 2.  */
++VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					     0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
++					     0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff,
++					     0xffffffffffffffff };
 +
-+      /* Don't move if a use is not dominated by def in insn.  */
-+      if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
-+	return false;
-+      if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
-+	return false;
-+    }
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
++int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
 +
-+  /* Check for other defs.  Any other def in the loop might reach a use
-+     currently reached by the def in insn.  */
-+  for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
-+    {
-+      basic_block def_bb = DF_REF_BB (def);
++/* Expected results.  */
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0,
++					 0xa0, 0xa0, 0xa0, 0xa0,
++					 0xa0, 0xa0, 0xa0, 0xa0,
++					 0xa0, 0xa0, 0xa0, 0xa0 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180,
++					 0x180, 0x180, 0x180, 0x180 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 };
 +
-+      /* Defs in exit block cannot reach a use they weren't already.  */
-+      if (single_succ_p (def_bb))
-+	{
-+	  basic_block def_bb_succ;
 +
-+	  def_bb_succ = single_succ (def_bb);
-+	  if (!flow_bb_inside_loop_p (loop, def_bb_succ))
-+	    continue;
-+	}
++#define INSN vqshlu
++#define TEST_MSG "VQSHLU_N/VQSHLUQ_N"
 +
-+      if (++defs_in_loop_count > 1)
-+	return false;
-+    }
++#define FNNAME1(NAME) void exec_ ## NAME ## _n(void)
++#define FNNAME(NAME) FNNAME1(NAME)
 +
-+  return true;
-+}
++FNNAME (INSN)
++{
++  /* Basic test: v2=vqshlu_n(v1,v), then store the result.  */
++#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N));		\
++  VECT_VAR(vector_res, T3, W, N) =					\
++    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			V);						\
++  vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N),				\
++		    VECT_VAR(vector_res, T3, W, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
 +
- /* Move invariant INVNO out of the LOOP.  Returns true if this succeeds, false
-    otherwise.  */
- 
-@@ -1545,11 +1622,8 @@ move_invariant_reg (struct loop *loop, unsigned invno)
- 	    }
- 	}
- 
--      /* Move the set out of the loop.  If the set is always executed (we could
--	 omit this condition if we know that the register is unused outside of
--	 the loop, but it does not seem worth finding out) and it has no uses
--	 that would not be dominated by it, we may just move it (TODO).
--	 Otherwise we need to create a temporary register.  */
-+      /* If possible, just move the set out of the loop.  Otherwise, we
-+	 need to create a temporary register.  */
-       set = single_set (inv->insn);
-       reg = dest = SET_DEST (set);
-       if (GET_CODE (reg) == SUBREG)
-@@ -1557,19 +1631,25 @@ move_invariant_reg (struct loop *loop, unsigned invno)
-       if (REG_P (reg))
- 	regno = REGNO (reg);
- 
--      reg = gen_reg_rtx_and_attrs (dest);
-+      if (!can_move_invariant_reg (loop, inv, reg))
-+	{
-+	  reg = gen_reg_rtx_and_attrs (dest);
- 
--      /* Try replacing the destination by a new pseudoregister.  */
--      validate_change (inv->insn, &SET_DEST (set), reg, true);
-+	  /* Try replacing the destination by a new pseudoregister.  */
-+	  validate_change (inv->insn, &SET_DEST (set), reg, true);
- 
--      /* As well as all the dominated uses.  */
--      replace_uses (inv, reg, true);
-+	  /* As well as all the dominated uses.  */
-+	  replace_uses (inv, reg, true);
- 
--      /* And validate all the changes.  */
--      if (!apply_change_group ())
--	goto fail;
-+	  /* And validate all the changes.  */
-+	  if (!apply_change_group ())
-+	    goto fail;
- 
--      emit_insn_after (gen_move_insn (dest, reg), inv->insn);
-+	  emit_insn_after (gen_move_insn (dest, reg), inv->insn);
-+	}
-+      else if (dump_file)
-+	fprintf (dump_file, "Invariant %d moved without introducing a new "
-+			    "temporary register\n", invno);
-       reorder_insns (inv->insn, inv->insn, BB_END (preheader));
- 
-       /* If there is a REG_EQUAL note on the insn we just moved, and the
---- a/src/gcc/lra-constraints.c
-+++ b/src/gcc/lra-constraints.c
-@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass,
- {
-   HARD_REG_SET temp;
-   
--  // ??? Is this assert right
--  // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass]));
-+  lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set));
-   COPY_HARD_REG_SET (temp, set);
-   AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs);
-   return (hard_reg_set_subset_p
---- a/src/gcc/objc/Make-lang.in
-+++ b/src/gcc/objc/Make-lang.in
-@@ -114,6 +114,7 @@ objc.uninstall:
- objc.mostlyclean:
- 	-rm -f objc/*$(objext) objc/xforward objc/fflags
- 	-rm -f objc/*$(coverageexts)
-+	-rm -f cc1obj$(exeext)
- objc.clean: objc.mostlyclean
- 	-rm -rf objc-headers
- objc.distclean:
---- a/src/gcc/objcp/Make-lang.in
-+++ b/src/gcc/objcp/Make-lang.in
-@@ -142,6 +142,7 @@ obj-c++.uninstall:
- obj-c++.mostlyclean:
- 	-rm -f objcp/*$(objext)
- 	-rm -f objcp/*$(coverageexts)
-+	-rm -f cc1objplus$(exeext)
- obj-c++.clean: obj-c++.mostlyclean
- obj-c++.distclean:
- 	-rm -f objcp/config.status objcp/Makefile
---- a/src/gcc/optabs.c
-+++ b/src/gcc/optabs.c
-@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
- {
-   struct expand_operand ops[2];
-   rtx rtx_op0, rtx_op1;
-+  machine_mode m0, m1;
-   enum rtx_code rcode = get_rtx_code (tcode, unsignedp);
- 
-   gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison);
- 
--  /* Expand operands.  */
-+  /* Expand operands.  For vector types with scalar modes, e.g. where int64x1_t
-+     has mode DImode, this can produce a constant RTX of mode VOIDmode; in such
-+     cases, use the original mode.  */
-   rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)),
- 			 EXPAND_STACK_PARM);
-+  m0 = GET_MODE (rtx_op0);
-+  if (m0 == VOIDmode)
-+    m0 = TYPE_MODE (TREE_TYPE (t_op0));
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-   rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)),
- 			 EXPAND_STACK_PARM);
-+  m1 = GET_MODE (rtx_op1);
-+  if (m1 == VOIDmode)
-+    m1 = TYPE_MODE (TREE_TYPE (t_op1));
- 
--  create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0));
--  create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1));
-+  create_input_operand (&ops[0], rtx_op0, m0);
-+  create_input_operand (&ops[1], rtx_op1, m1);
-   if (!maybe_legitimize_operands (icode, 4, 2, ops))
-     gcc_unreachable ();
-   return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
---- a/src/gcc/params.def
-+++ b/src/gcc/params.def
-@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH,
- 	 "Maximum depth of search in the dominator tree for expressions to hoist",
- 	 30, 0, 0)
- 
++#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+/* When synthesizing expnonentiation by a real constant operations using square
-+   roots, this controls how deep sqrt chains we are willing to generate.  */
-+DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH,
-+	 "max-pow-sqrt-depth",
-+	 "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant",
-+	 5, 1, 32)
 +
- /* This parameter limits the number of insns in a loop that will be unrolled,
-    and by how much the loop is unrolled.
- 
---- a/src/gcc/rtlanal.c
-+++ b/src/gcc/rtlanal.c
-@@ -104,7 +104,10 @@ generic_subrtx_iterator <T>::add_single_to_queue (array_type &array,
- 	  return base;
- 	}
-       gcc_checking_assert (i == LOCAL_ELEMS);
--      vec_safe_grow (array.heap, i + 1);
-+      /* A previous iteration might also have moved from the stack to the
-+	 heap, in which case the heap array will already be big enough.  */
-+      if (vec_safe_length (array.heap) <= i)
-+	vec_safe_grow (array.heap, i + 1);
-       base = array.heap->address ();
-       memcpy (base, array.stack, sizeof (array.stack));
-       base[LOCAL_ELEMS] = x;
---- a/src/gcc/simplify-rtx.c
-+++ b/src/gcc/simplify-rtx.c
-@@ -1171,7 +1171,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
-          = (float_truncate:SF foo:DF).
- 
-          (float_truncate:DF (float_extend:XF foo:SF))
--         = (float_extend:SF foo:DF).  */
-+         = (float_extend:DF foo:SF).  */
-       if ((GET_CODE (op) == FLOAT_TRUNCATE
- 	   && flag_unsafe_math_optimizations)
- 	  || GET_CODE (op) == FLOAT_EXTEND)
-@@ -1183,14 +1183,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
- 				   XEXP (op, 0), mode);
- 
-       /*  (float_truncate (float x)) is (float x)  */
--      if (GET_CODE (op) == FLOAT
-+      if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
- 	  && (flag_unsafe_math_optimizations
- 	      || (SCALAR_FLOAT_MODE_P (GET_MODE (op))
- 		  && ((unsigned)significand_size (GET_MODE (op))
- 		      >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
- 			  - num_sign_bit_copies (XEXP (op, 0),
- 						 GET_MODE (XEXP (op, 0))))))))
--	return simplify_gen_unary (FLOAT, mode,
-+	return simplify_gen_unary (GET_CODE (op), mode,
- 				   XEXP (op, 0),
- 				   GET_MODE (XEXP (op, 0)));
- 
-@@ -1221,7 +1221,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
- 	  rounding can't happen.
-           */
-       if (GET_CODE (op) == FLOAT_EXTEND
--	  || (GET_CODE (op) == FLOAT
-+	  || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
- 	      && SCALAR_FLOAT_MODE_P (GET_MODE (op))
- 	      && ((unsigned)significand_size (GET_MODE (op))
- 		  >= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
---- a/src/gcc/stmt.c
-+++ b/src/gcc/stmt.c
-@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num,
- 	else if (insn_extra_memory_constraint (cn))
- 	  *allows_mem = true;
- 	else
--	  {
--	    /* Otherwise we can't assume anything about the nature of
--	       the constraint except that it isn't purely registers.
--	       Treat it like "g" and hope for the best.  */
--	    *allows_reg = true;
--	    *allows_mem = true;
--	  }
-+	  insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
- 	break;
-       }
- 
-@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num,
- 	else if (insn_extra_memory_constraint (cn))
- 	  *allows_mem = true;
- 	else
--	  {
--	    /* Otherwise we can't assume anything about the nature of
--	       the constraint except that it isn't purely registers.
--	       Treat it like "g" and hope for the best.  */
--	    *allows_reg = true;
--	    *allows_mem = true;
--	  }
-+	  insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
- 	break;
-       }
- 
---- a/src/gcc/target.def
-+++ b/src/gcc/target.def
-@@ -1975,7 +1975,7 @@ merging.",
- DEFHOOKPOD
- (attribute_table,
-  "If defined, this target hook points to an array of @samp{struct\n\
--attribute_spec} (defined in @file{tree.h}) specifying the machine\n\
-+attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\
- specific attributes for this target and some of the restrictions on the\n\
- entities to which these attributes are applied and the arguments they\n\
- take.",
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c
-@@ -0,0 +1,34 @@
-+/* PR target/65648 */
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
-+int a = 0, *b = 0, c = 0;
-+static int d = 0;
-+short e = 1;
-+static long long f = 0;
-+long long *i = &f;
-+unsigned char j = 0;
++  clean_results ();
 +
-+__attribute__((noinline, noclone)) void
-+foo (int x, int *y)
-+{
-+  asm volatile ("" : : "r" (x), "r" (y) : "memory");
-+}
++  /* Fill input vector with negative values, to check saturation on
++     limits.  */
++  VDUP(vector, , int, s, 8, 8, -1);
++  VDUP(vector, , int, s, 16, 4, -2);
++  VDUP(vector, , int, s, 32, 2, -3);
++  VDUP(vector, , int, s, 64, 1, -4);
++  VDUP(vector, q, int, s, 8, 16, -1);
++  VDUP(vector, q, int, s, 16, 8, -2);
++  VDUP(vector, q, int, s, 32, 4, -3);
++  VDUP(vector, q, int, s, 64, 2, -4);
 +
-+__attribute__((noinline, noclone)) void
-+bar (const char *x, long long y)
-+{
-+  asm volatile ("" : : "r" (x), "r" (&y) : "memory");
-+  if (y != 0)
-+    __builtin_abort ();
-+}
++  /* Choose shift amount arbitrarily.  */
++#define CMT " (negative input)"
++  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT);
 +
-+int
-+main ()
-+{
-+  int k = 0;
-+  b = &k;
-+  j = (!a) - (c <= e);
-+  *i = j;
-+  foo (a, &k);
-+  bar ("", f);
-+  return 0;
-+}
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/loop-8.c
-@@ -0,0 +1,24 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
 +
-+void
-+f (int *a, int *b)
-+{
-+  int i;
++  
++  /* Fill input vector with max value, to check saturation on
++     limits.  */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);
 +
-+  for (i = 0; i < 100; i++)
-+    {
-+      int d = 42;
++  /* shift by 1.  */
++#undef CMT
++#define CMT " (shift by 1)"
++  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT);
 +
-+      a[i] = d;
-+      if (i % 2)
-+	d = i;
-+      b[i] = d;
-+    }
-+}
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT);
 +
-+/* Load of 42 is moved out of the loop, introducing a new pseudo register.  */
-+/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */
-+/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */
-+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
++  /* shift by 2 to force saturation.  */
++#undef CMT
++#define CMT " (shift by 2)"
++  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT);
 +
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/loop-9.c
-@@ -0,0 +1,16 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT);
 +
-+void
-+f (double *a)
-+{
-+  int i;
-+  for (i = 0; i < 100; i++)
-+    a[i] = 18.4242;
-+}
++  
++  /* Fill input vector with positive values, to check normal case.  */
++  VDUP(vector, , int, s, 8, 8, 1);
++  VDUP(vector, , int, s, 16, 4, 2);
++  VDUP(vector, , int, s, 32, 2, 3);
++  VDUP(vector, , int, s, 64, 1, 4);
++  VDUP(vector, q, int, s, 8, 16, 5);
++  VDUP(vector, q, int, s, 16, 8, 6);
++  VDUP(vector, q, int, s, 32, 4, 7);
++  VDUP(vector, q, int, s, 64, 2, 8);
 +
-+/* Load of x is moved out of the loop.  */
-+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */
-+/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */
-+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
++  /* Arbitrary shift amount.  */
++#undef CMT
++#define CMT ""
++  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT);
++  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT);
 +
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c
-@@ -0,0 +1,43 @@
-+/* { dg-do compile { target x86_64-*-* } } */
-+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
-+/* NOTE: The target list above could be extended to other targets that have
-+         conditional moves, but don't have zero registers.  */
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++}
 +
-+enum test_type
++int main (void)
 +{
-+  TYPE0,
-+  TYPE1
-+};
++  exec_vqshlu_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c
+@@ -0,0 +1,177 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+struct type_node
-+{
-+  enum test_type type;
-+};
++/* Expected values of cumulative_saturation flag.  */
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
 +
-+struct test_ref
-+{
-+  struct type_node *referring;
-+};
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
++				       0xfa, 0xfa, 0xfb, 0xfb };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++					0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
 +
-+struct test_node
-+{
-+  struct test_node *next;
-+};
++/* Expected values of cumulative_saturation flag with max input value
++   shifted by 3.  */
++int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1;
 +
-+int iterate (struct test_node *, unsigned, struct test_ref **);
++/* Expected results with max input value shifted by 3.  */
++VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++					       0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
 +
-+int
-+loop_invar (struct test_node *node)
-+{
-+  struct test_ref *ref;
++/* Expected values of cumulative_saturation flag with max input value
++   shifted by type size.  */
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0;
++int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0;
 +
-+  for (unsigned i = 0; iterate (node, i, &ref); i++)
-+    if (loop_invar ((ref->referring && ref->referring->type == TYPE0)
-+                    ? ((struct test_node *) (ref->referring)) : 0))
-+      return 1;
++/* Expected results with max input value shifted by type size.  */
++VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
++						 0x7f, 0x7f, 0x7f, 0x7f };
++VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff,
++						  0x7fff, 0x7fff };
++VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff,
++						   0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
 +
-+  return 0;
-+}
++#define INSN vqshrn_n
++#define TEST_MSG "VQSHRN_N"
 +
-+/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */
-+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c
-@@ -0,0 +1,6 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
 +
-+#define EXPN (-6 * (0.5*0.5*0.5*0.5))
++FNNAME (INSN)
++{
++  /* Basic test: y=vqshrn_n(x,v), then store the result.  */
++#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));		\
++  VECT_VAR(vector_res, T1, W2, N) =					\
++    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
++		   V);							\
++  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),				\
++		VECT_VAR(vector_res, T1, W2, N));			\
++  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+#include "pow-sqrt.x"
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c
-@@ -0,0 +1,5 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+#define EXPN (-5.875)
-+#include "pow-sqrt.x"
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c
-@@ -0,0 +1,5 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */
++#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+#define EXPN (1.25)
-+#include "pow-sqrt.x"
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x
-@@ -0,0 +1,30 @@
 +
-+extern void abort (void);
++  /* vector is twice as large as vector_res.  */
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++  DECL_VARIABLE(vector, uint, 16, 8);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, uint, 64, 2);
 +
++  DECL_VARIABLE(vector_res, int, 8, 8);
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
 +
-+__attribute__((noinline)) double
-+real_pow (double x, double pow_exp)
-+{
-+  return __builtin_pow (x, pow_exp);
-+}
++  clean_results ();
 +
-+#define EPS (0.000000000000000000001)
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++  VLOAD(vector, buffer, q, int, s, 64, 2);
++  VLOAD(vector, buffer, q, uint, u, 16, 8);
++  VLOAD(vector, buffer, q, uint, u, 32, 4);
++  VLOAD(vector, buffer, q, uint, u, 64, 2);
 +
-+#define SYNTH_POW(X, Y) __builtin_pow (X, Y)
-+volatile double arg;
++  /* Choose shift amount arbitrarily.  */
++#define CMT ""
++  TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
++  TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
++  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
 +
-+int
-+main (void)
-+{
-+  double i_arg = 0.1;
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
 +
-+  for (arg = i_arg; arg < 100.0; arg += 1.0)
-+    {
-+      double synth_res = SYNTH_POW (arg, EXPN);
-+      double real_res = real_pow (arg, EXPN);
 +
-+      if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS)
-+	abort ();
-+    }
-+  return 0;
-+}
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile } */
-+/* { dg-options "" } */
-+/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */
++  /* Use max possible value as input.  */
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
 +
-+void
-+f0a (char *result, char *arg1, char *arg4, char temp_6)
-+{
-+  int idx = 0;
-+  for (idx = 0; idx < 416; idx += 1)
-+    result[idx] = (arg1[idx] + arg4[idx]) * temp_6;
-+}
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
-@@ -0,0 +1,54 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
++#undef CMT
++#define CMT " (check saturation: shift by 3)"
++  TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
++  TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
++  TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
++  TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
++  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
++  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
 +
-+void foo (double *p)
-+{
-+  int i;
-+  for (i = -20000; i < 200000; i+= 40)
-+    {
-+      p[i+0] = 1.0;
-+      p[i+1] = 1.0;
-+      p[i+2] = 1.0;
-+      p[i+3] = 1.0;
-+      p[i+4] = 1.0;
-+      p[i+5] = 1.0;
-+      p[i+6] = 1.0;
-+      p[i+7] = 1.0;
-+      p[i+8] = 1.0;
-+      p[i+9] = 1.0;
-+      p[i+10] = 1.0;
-+      p[i+11] = 1.0;
-+      p[i+12] = 1.0;
-+      p[i+13] = 1.0;
-+      p[i+14] = 1.0;
-+      p[i+15] = 1.0;
-+      p[i+16] = 1.0;
-+      p[i+17] = 1.0;
-+      p[i+18] = 1.0;
-+      p[i+19] = 1.0;
-+      p[i+20] = 1.0;
-+      p[i+21] = 1.0;
-+      p[i+22] = 1.0;
-+      p[i+23] = 1.0;
-+      p[i+24] = 1.0;
-+      p[i+25] = 1.0;
-+      p[i+26] = 1.0;
-+      p[i+27] = 1.0;
-+      p[i+28] = 1.0;
-+      p[i+29] = 1.0;
-+      p[i+30] = 1.0;
-+      p[i+31] = 1.0;
-+      p[i+32] = 1.0;
-+      p[i+33] = 1.0;
-+      p[i+34] = 1.0;
-+      p[i+35] = 1.0;
-+      p[i+36] = 1.0;
-+      p[i+37] = 1.0;
-+      p[i+38] = 1.0;
-+      p[i+39] = 1.0;
-+    }
-+}
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT);
 +
-+/* We should groups address type IV uses.  */
-+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } }  */
-+/* { dg-final { cleanup-tree-dump "ivopts" } }  */
---- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
-@@ -12,6 +12,8 @@
- 
- vf2_t vf2 = (vf2_t){ 17.f, 18.f };
- vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead };
-+vlf1_t vlf1 = (vlf1_t) { 17.0 };
 +
- union int128_t qword;
- 
- int *int_ptr = (int *)0xabcdef0123456789ULL;
-@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11,   long double, 98765432123456789.987654321L, Q0, flat)
- FUNC_VAL_CHECK (12,         vf2_t,        vf2, D0, f32in64)
- FUNC_VAL_CHECK (13,         vi4_t,        vi4, Q0, i32in128)
- FUNC_VAL_CHECK (14,         int *,    int_ptr, X0, flat)
-+FUNC_VAL_CHECK (15,         vlf1_t,    vlf1, Q0, flat)
- #endif
---- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
-+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
-@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16)));
- /* 128-bit vector of 4 ints.  */
- typedef int vi4_t __attribute__((vector_size (16)));
- 
-+/* 128-bit vector of 1 quad precision float.  */
-+typedef long double vlf1_t __attribute__((vector_size (16)));
++#undef CMT
++#define CMT " (check saturation: shift by max)"
++  TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
++  TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
 +
- /* signed quad-word (in an union for the convenience of initialization).  */
- union int128_t
- {
---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
-@@ -27,14 +27,26 @@ load_lib gcc-dg.exp
- 
- # Initialize `dg'.
- load_lib c-torture.exp
--load_lib target-supports.exp
--load_lib torture-options.exp
- 
- dg-init
- 
--if {[istarget arm*-*-*]
--    && ![check_effective_target_arm_neon_ok]} then {
--  return
-+# The default action for a test is 'run'.  Save current default.
-+global dg-do-what-default
-+set save-dg-do-what-default ${dg-do-what-default}
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
++}
 +
-+# For ARM, make sure that we have a target compatible with NEON, and do
-+# not attempt to run execution tests if the hardware doesn't support it.
-+if {[istarget arm*-*-*]} then {
-+    if {![check_effective_target_arm_neon_ok]} then {
-+      return
-+    }
-+    if {![is-effective-target arm_neon_hw]} then {
-+        set dg-do-what-default compile
-+    } else {
-+        set dg-do-what-default run
-+    }
-+} else {
-+    set dg-do-what-default run
- }
- 
- torture-init
-@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
- set additional_flags [add_options_for_arm_neon ""]
- 
- # Main loop.
--foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
--    # If we're only testing specific files and this isn't one of them, skip it.
--    if ![runtest_file_p $runtests $src] then {
--	continue
--    }
--
--    # runtest_file_p is already run above, and the code below can run
--    # runtest_file_p again, make sure everything for this test is
--    # performed if the above runtest_file_p decided this runtest
--    # instance should execute the test
--    gcc_parallel_test_enable 0
--    c-torture-execute $src $additional_flags
--    gcc-dg-runtest $src "" $additional_flags
--    gcc_parallel_test_enable 1
--}
-+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
-+	       "" ${additional_flags}
- 
- # All done.
-+set dg-do-what-default ${save-dg-do-what-default}
- torture-finish
- dg-finish
++int main (void)
++{
++  exec_vqshrn_n ();
++  return 0;
++}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c
-@@ -0,0 +1,134 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c
+@@ -0,0 +1,133 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
++/* Expected values of cumulative_saturation flag with negative input.  */
++int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
++
++/* Expected results with negative input.  */
++VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++
++/* Expected values of cumulative_saturation flag with max input value
++   shifted by 1.  */
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
++
++/* Expected results with max input value shifted by 1.  */
++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
++
 +/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
++int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
++int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
++int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
 +
 +/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12,
-+				       0x12, 0x12, 0x12, 0x12 };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82,
-+					0x82, 0x82, 0x82, 0x82 };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 };
-+
-+/* Expected values of cumulative_saturation flag when saturation occurs.  */
-+int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1;
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48,
++					0x48, 0x48, 0x48, 0x48 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe };
 +
-+/* Expected results when saturation occurs.  */
-+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
 +
-+#define INSN_NAME vqmovn
-+#define TEST_MSG "VQMOVN"
++#define INSN vqshrun_n
++#define TEST_MSG "VQSHRUN_N"
 +
 +#define FNNAME1(NAME) void exec_ ## NAME (void)
 +#define FNNAME(NAME) FNNAME1(NAME)
 +
-+FNNAME (INSN_NAME)
++FNNAME (INSN)
 +{
-+  /* Basic test: y=OP(x), then store the result.  */
-+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N));			\
-+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		 VECT_VAR(vector_res, T1, W, N));			\
++  /* Basic test: y=vqshrun_n(x,v), then store the result.  */
++#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N));	\
++  VECT_VAR(vector_res, uint, W2, N) =					\
++    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
++		   V);							\
++  vst1_u##W2(VECT_VAR(result, uint, W2, N),				\
++	     VECT_VAR(vector_res, uint, W2, N));			\
 +  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Two auxliary macros are necessary to expand INSN */
++#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
 +
-+  /* No need for 64 bits variants.  */
++#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
++  TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++
++
++  /* vector is twice as large as vector_res.  */
 +  DECL_VARIABLE(vector, int, 16, 8);
 +  DECL_VARIABLE(vector, int, 32, 4);
 +  DECL_VARIABLE(vector, int, 64, 2);
-+  DECL_VARIABLE(vector, uint, 16, 8);
-+  DECL_VARIABLE(vector, uint, 32, 4);
-+  DECL_VARIABLE(vector, uint, 64, 2);
 +
-+  DECL_VARIABLE(vector_res, int, 8, 8);
-+  DECL_VARIABLE(vector_res, int, 16, 4);
-+  DECL_VARIABLE(vector_res, int, 32, 2);
 +  DECL_VARIABLE(vector_res, uint, 8, 8);
 +  DECL_VARIABLE(vector_res, uint, 16, 4);
 +  DECL_VARIABLE(vector_res, uint, 32, 2);
 +
 +  clean_results ();
 +
-+  /* Fill input vector with arbitrary values.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x12);
-+  VDUP(vector, q, int, s, 32, 4, 0x1278);
-+  VDUP(vector, q, int, s, 64, 2, 0x12345678);
-+  VDUP(vector, q, uint, u, 16, 8, 0x82);
-+  VDUP(vector, q, uint, u, 32, 4, 0x8765);
-+  VDUP(vector, q, uint, u, 64, 2, 0x87654321);
++  /* Fill input vector with negative values, to check saturation on
++     limits.  */
++  VDUP(vector, q, int, s, 16, 8, -2);
++  VDUP(vector, q, int, s, 32, 4, -3);
++  VDUP(vector, q, int, s, 64, 2, -4);
 +
-+  /* Apply a unary operator named INSN_NAME.  */
++  /* Choose shift amount arbitrarily.  */
++#define CMT " (negative input)"
++  TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
++  TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++
++  
++  /* Fill input vector with max value, to check saturation on
++     limits.  */
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++
++#undef CMT
++#define CMT " (check cumulative saturation)"
++  TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
++  TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
++  TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
++
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
++
++  
++  /* Fill input vector with positive values, to check normal case.  */
++  VDUP(vector, q, int, s, 16, 8, 0x1234);
++  VDUP(vector, q, int, s, 32, 4, 0x87654321);
++  VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
++
++#undef CMT
 +#define CMT ""
-+  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
++  TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
++  TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
++  TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
 +  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
 +  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
 +  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++}
++
++int main (void)
++{
++  exec_vqshrun_n ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c
+@@ -25,10 +25,6 @@ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b,
+ 					 0xff8c, 0xff8d };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+ 					0xe3, 0xe4, 0xe5, 0xe6,
+ 					0xe7, 0xe8, 0xe9, 0xea,
+@@ -49,14 +45,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a,
+ 					 0xffffff7b, 0xffffff7c };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68,
+ 					 0xffffffffffffff69 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected values of cumulative saturation flag.  */
+ int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c
+@@ -0,0 +1,154 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++#include <math.h>
 +
++/* Expected results with positive input.  */
++VECT_VAR_DECL(expected_positive,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected_positive,uint,32,4) [] = { 0xbf000000, 0xbf000000,
++						  0xbf000000, 0xbf000000 };
++VECT_VAR_DECL(expected_positive,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 };
++VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000,
++						    0x3c030000, 0x3c030000 };
 +
-+  /* Fill input vector with arbitrary values which cause cumulative
-+     saturation.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x1234);
-+  VDUP(vector, q, int, s, 32, 4, 0x12345678);
-+  VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL);
-+  VDUP(vector, q, uint, u, 16, 8, 0x8234);
-+  VDUP(vector, q, uint, u, 32, 4, 0x87654321);
-+  VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL);
++/* Expected results with negative input.  */
++VECT_VAR_DECL(expected_negative,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_negative,uint,32,4) [] = { 0xee800000, 0xee800000,
++						  0xee800000, 0xee800000 };
++VECT_VAR_DECL(expected_negative,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 };
++VECT_VAR_DECL(expected_negative,hfloat,32,4) [] = { 0xbc030000, 0xbc030000,
++						    0xbc030000, 0xbc030000 };
++
++/* Expected results with FP special values (NaN, infinity).  */
++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected results with FP special values (zero, large value).  */
++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 };
++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected results with FP special values (-0, -infinity).  */
++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x80000000, 0x80000000,
++					       0x80000000, 0x80000000 };
++
++/* Expected results with FP special large negative value.  */
++VECT_VAR_DECL(expected_fp4,hfloat,32,2) [] = { 0x80000000, 0x80000000 };
++
++#define TEST_MSG "VRECPE/VRECPEQ"
++void exec_vrecpe(void)
++{
++  int i;
++
++  /* Basic test: y=vrecpe(x), then store the result.  */
++#define TEST_VRECPE(Q, T1, T2, W, N)			\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
++		    VECT_VAR(vector_res, T1, W, N))
++
++  /* No need for 64 bits variants.  */
++  DECL_VARIABLE(vector, uint, 32, 2);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, float, 32, 2);
++  DECL_VARIABLE(vector, float, 32, 4);
++
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 32, 4);
++  DECL_VARIABLE(vector_res, float, 32, 2);
++  DECL_VARIABLE(vector_res, float, 32, 4);
++
++  clean_results ();
++
++  /* Choose init value arbitrarily, positive.  */
++  VDUP(vector, , uint, u, 32, 2, 0x12345678);
++  VDUP(vector, , float, f, 32, 2, 1.9f);
++  VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10);
++  VDUP(vector, q, float, f, 32, 4, 125.0f);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, uint, u, 32, 2);
++  TEST_VRECPE(, float, f, 32, 2);
++  TEST_VRECPE(q, uint, u, 32, 4);
++  TEST_VRECPE(q, float, f, 32, 4);
++
++#define CMT " (positive input)"
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_positive, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_positive, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_positive, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_positive, CMT);
++
++  /* Choose init value arbitrarily,negative.  */
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , float, f, 32, 2, -10.0f);
++  VDUP(vector, q, uint, u, 32, 4, 0x89081234);
++  VDUP(vector, q, float, f, 32, 4, -125.0f);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, uint, u, 32, 2);
++  TEST_VRECPE(, float, f, 32, 2);
++  TEST_VRECPE(q, uint, u, 32, 4);
++  TEST_VRECPE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " (negative input)"
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_negative, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_negative, CMT);
++
++  /* Test FP variants with special input values (NaN, infinity).  */
++  VDUP(vector, , float, f, 32, 2, NAN);
++  VDUP(vector, q, float, f, 32, 4, HUGE_VALF);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, float, f, 32, 2);
++  TEST_VRECPE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (NaN, infinity)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
++
++  /* Test FP variants with special input values (zero, large value).  */
++  VDUP(vector, , float, f, 32, 2, 0.0f);
++  VDUP(vector, q, float, f, 32, 4, 8.97229e37f /*9.0e37f*/);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, float, f, 32, 2);
++  TEST_VRECPE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (zero, large value)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
++
++  /* Test FP variants with special input values (-0, -infinity).  */
++  VDUP(vector, , float, f, 32, 2, -0.0f);
++  VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, float, f, 32, 2);
++  TEST_VRECPE(q, float, f, 32, 4);
 +
-+  /* Apply a unary operator named INSN_NAME.  */
 +#undef CMT
-+#define CMT " (with saturation)"
-+  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT);
-+  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT);
-+  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT);
++#define CMT " FP special (-0, -infinity)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT);
++  /* Test FP variants with special input values (large negative value).  */
++  VDUP(vector, , float, f, 32, 2, -9.0e37f);
++
++  /* Apply the operator.  */
++  TEST_VRECPE(, float, f, 32, 2);
++
++#undef CMT
++#define CMT " FP special (large negative value)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp4, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqmovn ();
++  exec_vrecpe ();
 +  return 0;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c
-@@ -0,0 +1,93 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c
+@@ -0,0 +1,117 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
++#include <math.h>
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
++/* Expected results with positive input.  */
++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 };
++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f,
++					   0xc1db851f, 0xc1db851f };
++
++/* Expected results with FP special values (NaN).  */
++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
++					       0x7fc00000, 0x7fc00000 };
++
++/* Expected results with FP special values (infinity, 0) and normal
++   values.  */
++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x40000000, 0x40000000,
++					       0x40000000, 0x40000000 };
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34,
-+					0x34, 0x34, 0x34, 0x34 };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 };
++/* Expected results with FP special values (infinity, 0).  */
++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x40000000, 0x40000000,
++					       0x40000000, 0x40000000 };
 +
-+/* Expected values of cumulative_saturation flag with negative input.  */
-+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1;
++#define TEST_MSG "VRECPS/VRECPSQ"
++void exec_vrecps(void)
++{
++  int i;
 +
-+/* Expected results with negative input.  */
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++  /* Basic test: y=vrecps(x), then store the result.  */
++#define TEST_VRECPS(Q, T1, T2, W, N)			\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
++			VECT_VAR(vector2, T1, W, N));	\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
++		    VECT_VAR(vector_res, T1, W, N))
++
++  /* No need for integer variants.  */
++  DECL_VARIABLE(vector, float, 32, 2);
++  DECL_VARIABLE(vector, float, 32, 4);
 +
-+#define INSN_NAME vqmovun
-+#define TEST_MSG "VQMOVUN"
++  DECL_VARIABLE(vector2, float, 32, 2);
++  DECL_VARIABLE(vector2, float, 32, 4);
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  DECL_VARIABLE(vector_res, float, 32, 2);
++  DECL_VARIABLE(vector_res, float, 32, 4);
 +
-+FNNAME (INSN_NAME)
-+{
-+  /* Basic test: y=OP(x), then store the result.  */
-+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##_s##W2(VECT_VAR(vector, int, W2, N));				\
-+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		 VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  clean_results ();
 +
-+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Choose init value arbitrarily.  */
++  VDUP(vector, , float, f, 32, 2, 12.9f);
++  VDUP(vector, q, float, f, 32, 4, 9.2f);
 +
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
-+  DECL_VARIABLE(vector, int, 64, 2);
++  VDUP(vector2, , float, f, 32, 2, 8.9f);
++  VDUP(vector2, q, float, f, 32, 4, 3.2f);
 +
-+  DECL_VARIABLE(vector_res, uint, 8, 8);
-+  DECL_VARIABLE(vector_res, uint, 16, 4);
-+  DECL_VARIABLE(vector_res, uint, 32, 2);
++  /* Apply the operator.  */
++  TEST_VRECPS(, float, f, 32, 2);
++  TEST_VRECPS(q, float, f, 32, 4);
 +
-+  clean_results ();
++#define CMT " (positive input)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT);
 +
-+  /* Fill input vector with arbitrary values.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x34);
-+  VDUP(vector, q, int, s, 32, 4, 0x5678);
-+  VDUP(vector, q, int, s, 64, 2, 0x12345678);
 +
-+  /* Apply a unary operator named INSN_NAME.  */
-+#define CMT ""
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
++  /* Test FP variants with special input values (NaN).  */
++  VDUP(vector, , float, f, 32, 2, NAN);
++  VDUP(vector2, q, float, f, 32, 4, NAN);
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  /* Apply the operator.  */
++  TEST_VRECPS(, float, f, 32, 2);
++  TEST_VRECPS(q, float, f, 32, 4);
 +
-+  /* Fill input vector with negative values.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x8234);
-+  VDUP(vector, q, int, s, 32, 4, 0x87654321);
-+  VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL);
++#undef CMT
++#define CMT " FP special (NaN)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
++
++
++  /* Test FP variants with special input values (infinity, 0).  */
++  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
++  VDUP(vector, q, float, f, 32, 4, 0.0f);
++  VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value.  */
++
++  /* Apply the operator.  */
++  TEST_VRECPS(, float, f, 32, 2);
++  TEST_VRECPS(q, float, f, 32, 4);
 +
-+  /* Apply a unary operator named INSN_NAME.  */
 +#undef CMT
-+#define CMT " (negative input)"
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT);
++#define CMT " FP special (infinity, 0) and normal value"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++
++  /* Test FP variants with only special input values (infinity, 0).  */
++  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
++  VDUP(vector, q, float, f, 32, 4, 0.0f);
++  VDUP(vector2, , float, f, 32, 2, 0.0f);
++  VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
++
++  /* Apply the operator */
++  TEST_VRECPS(, float, f, 32, 2);
++  TEST_VRECPS(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (infinity, 0)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqmovun ();
++  exec_vrecps ();
 +  return 0;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c
-@@ -0,0 +1,161 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret.c
+@@ -0,0 +1,741 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+
-+/* Expected values of cumulative_saturation flag when multiplication
-+   saturates.  */
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
-+
-+/* Expected results when multiplication saturates.  */
-+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					    0x7fffffff, 0x7fffffff };
-+
-+/* Expected values of cumulative_saturation flag when rounding
-+   should not cause saturation.  */
-+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
-+
-+/* Expected results when rounding should not cause saturation.  */
-+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					      0x7fffffff, 0x7fffffff };
++/* Expected results for vreinterpret_s8_xx.  */
++VECT_VAR_DECL(expected_s8_1,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					    0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_s8_2,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					    0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_s8_3,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_s8_4,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					    0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_s8_5,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					    0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_s8_6,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					    0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_s8_7,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					    0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_s8_8,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					    0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_s8_9,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					    0xf2, 0xff, 0xf3, 0xff };
++
++/* Expected results for vreinterpret_s16_xx.  */
++VECT_VAR_DECL(expected_s16_1,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_s16_2,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_s16_3,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_s16_4,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_s16_5,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_s16_6,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_s16_7,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_s16_8,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_s16_9,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++
++/* Expected results for vreinterpret_s32_xx.  */
++VECT_VAR_DECL(expected_s32_1,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_s32_2,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_s32_3,int,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_s32_4,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_s32_5,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_s32_6,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_s32_7,int,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_s32_8,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_s32_9,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++
++/* Expected results for vreinterpret_s64_xx.  */
++VECT_VAR_DECL(expected_s64_1,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_s64_2,int,64,1) [] = { 0xfff3fff2fff1fff0 };
++VECT_VAR_DECL(expected_s64_3,int,64,1) [] = { 0xfffffff1fffffff0 };
++VECT_VAR_DECL(expected_s64_4,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_s64_5,int,64,1) [] = { 0xfff3fff2fff1fff0 };
++VECT_VAR_DECL(expected_s64_6,int,64,1) [] = { 0xfffffff1fffffff0 };
++VECT_VAR_DECL(expected_s64_7,int,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected_s64_8,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_s64_9,int,64,1) [] = { 0xfff3fff2fff1fff0 };
++
++/* Expected results for vreinterpret_u8_xx.  */
++VECT_VAR_DECL(expected_u8_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					     0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_u8_2,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_u8_3,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_u8_4,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_u8_5,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_u8_6,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_u8_7,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_u8_8,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					     0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_u8_9,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
++
++/* Expected results for vreinterpret_u16_xx.  */
++VECT_VAR_DECL(expected_u16_1,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_u16_2,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_u16_3,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_u16_4,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_u16_5,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_u16_6,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_u16_7,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_u16_8,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_u16_9,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++
++/* Expected results for vreinterpret_u32_xx.  */
++VECT_VAR_DECL(expected_u32_1,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_u32_2,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_u32_3,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_u32_4,uint,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_u32_5,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_u32_6,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_u32_7,uint,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_u32_8,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_u32_9,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++
++/* Expected results for vreinterpret_u64_xx.  */
++VECT_VAR_DECL(expected_u64_1,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_u64_2,uint,64,1) [] = { 0xfff3fff2fff1fff0 };
++VECT_VAR_DECL(expected_u64_3,uint,64,1) [] = { 0xfffffff1fffffff0 };
++VECT_VAR_DECL(expected_u64_4,uint,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected_u64_5,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_u64_6,uint,64,1) [] = { 0xfff3fff2fff1fff0 };
++VECT_VAR_DECL(expected_u64_7,uint,64,1) [] = { 0xfffffff1fffffff0 };
++VECT_VAR_DECL(expected_u64_8,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
++VECT_VAR_DECL(expected_u64_9,uint,64,1) [] = { 0xfff3fff2fff1fff0 };
++
++/* Expected results for vreinterpret_p8_xx.  */
++VECT_VAR_DECL(expected_p8_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					     0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_p8_2,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_p8_3,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_p8_4,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_p8_5,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					     0xf4, 0xf5, 0xf6, 0xf7 };
++VECT_VAR_DECL(expected_p8_6,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
++VECT_VAR_DECL(expected_p8_7,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xf1, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_p8_8,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
++					     0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_p8_9,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					     0xf2, 0xff, 0xf3, 0xff };
 +
-+#define INSN vqrdmulh
-+#define TEST_MSG "VQRDMULH"
++/* Expected results for vreinterpret_p16_xx.  */
++VECT_VAR_DECL(expected_p16_1,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_p16_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_p16_3,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_p16_4,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_p16_5,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++VECT_VAR_DECL(expected_p16_6,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_p16_7,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff };
++VECT_VAR_DECL(expected_p16_8,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_p16_9,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 };
++
++/* Expected results for vreinterpretq_s8_xx.  */
++VECT_VAR_DECL(expected_q_s8_1,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					       0xf2, 0xff, 0xf3, 0xff,
++					       0xf4, 0xff, 0xf5, 0xff,
++					       0xf6, 0xff, 0xf7, 0xff };
++VECT_VAR_DECL(expected_q_s8_2,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++					       0xf1, 0xff, 0xff, 0xff,
++					       0xf2, 0xff, 0xff, 0xff,
++					       0xf3, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_s8_3,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++					       0xff, 0xff, 0xff, 0xff,
++					       0xf1, 0xff, 0xff, 0xff,
++					       0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_s8_4,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					       0xf4, 0xf5, 0xf6, 0xf7,
++					       0xf8, 0xf9, 0xfa, 0xfb,
++					       0xfc, 0xfd, 0xfe, 0xff };
++VECT_VAR_DECL(expected_q_s8_5,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					       0xf2, 0xff, 0xf3, 0xff,
++					       0xf4, 0xff, 0xf5, 0xff,
++					       0xf6, 0xff, 0xf7, 0xff };
++VECT_VAR_DECL(expected_q_s8_6,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++					       0xf1, 0xff, 0xff, 0xff,
++					       0xf2, 0xff, 0xff, 0xff,
++					       0xf3, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_s8_7,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++					       0xff, 0xff, 0xff, 0xff,
++					       0xf1, 0xff, 0xff, 0xff,
++					       0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_s8_8,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					       0xf4, 0xf5, 0xf6, 0xf7,
++					       0xf8, 0xf9, 0xfa, 0xfb,
++					       0xfc, 0xfd, 0xfe, 0xff };
++VECT_VAR_DECL(expected_q_s8_9,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++					       0xf2, 0xff, 0xf3, 0xff,
++					       0xf4, 0xff, 0xf5, 0xff,
++					       0xf6, 0xff, 0xf7, 0xff };
++
++/* Expected results for vreinterpretq_s16_xx.  */
++VECT_VAR_DECL(expected_q_s16_1,int,16,8) [] = { 0xf1f0, 0xf3f2,
++						0xf5f4, 0xf7f6,
++						0xf9f8, 0xfbfa,
++						0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_s16_2,int,16,8) [] = { 0xfff0, 0xffff,
++						0xfff1, 0xffff,
++						0xfff2, 0xffff,
++						0xfff3, 0xffff };
++VECT_VAR_DECL(expected_q_s16_3,int,16,8) [] = { 0xfff0, 0xffff,
++						0xffff, 0xffff,
++						0xfff1, 0xffff,
++						0xffff, 0xffff };
++VECT_VAR_DECL(expected_q_s16_4,int,16,8) [] = { 0xf1f0, 0xf3f2,
++						0xf5f4, 0xf7f6,
++						0xf9f8, 0xfbfa,
++						0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_s16_5,int,16,8) [] = { 0xfff0, 0xfff1,
++						0xfff2, 0xfff3,
++						0xfff4, 0xfff5,
++						0xfff6, 0xfff7 };
++VECT_VAR_DECL(expected_q_s16_6,int,16,8) [] = { 0xfff0, 0xffff,
++						0xfff1, 0xffff,
++						0xfff2, 0xffff,
++						0xfff3, 0xffff };
++VECT_VAR_DECL(expected_q_s16_7,int,16,8) [] = { 0xfff0, 0xffff,
++						0xffff, 0xffff,
++						0xfff1, 0xffff,
++						0xffff, 0xffff };
++VECT_VAR_DECL(expected_q_s16_8,int,16,8) [] = { 0xf1f0, 0xf3f2,
++						0xf5f4, 0xf7f6,
++						0xf9f8, 0xfbfa,
++						0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_s16_9,int,16,8) [] = { 0xfff0, 0xfff1,
++						0xfff2, 0xfff3,
++						0xfff4, 0xfff5,
++						0xfff6, 0xfff7 };
++
++/* Expected results for vreinterpretq_s32_xx.  */
++VECT_VAR_DECL(expected_q_s32_1,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_s32_2,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_s32_3,int,32,4) [] = { 0xfffffff0, 0xffffffff,
++						0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_s32_4,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_s32_5,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_s32_6,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
++						0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_q_s32_7,int,32,4) [] = { 0xfffffff0, 0xffffffff,
++						0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_s32_8,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_s32_9,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						0xfff5fff4, 0xfff7fff6 };
++
++/* Expected results for vreinterpretq_s64_xx.  */
++VECT_VAR_DECL(expected_q_s64_1,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_s64_2,int,64,2) [] = { 0xfff3fff2fff1fff0,
++						0xfff7fff6fff5fff4 };
++VECT_VAR_DECL(expected_q_s64_3,int,64,2) [] = { 0xfffffff1fffffff0,
++						0xfffffff3fffffff2 };
++VECT_VAR_DECL(expected_q_s64_4,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_s64_5,int,64,2) [] = { 0xfff3fff2fff1fff0,
++						0xfff7fff6fff5fff4 };
++VECT_VAR_DECL(expected_q_s64_6,int,64,2) [] = { 0xfffffff1fffffff0,
++						0xfffffff3fffffff2 };
++VECT_VAR_DECL(expected_q_s64_7,int,64,2) [] = { 0xfffffffffffffff0,
++						0xfffffffffffffff1 };
++VECT_VAR_DECL(expected_q_s64_8,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_s64_9,int,64,2) [] = { 0xfff3fff2fff1fff0,
++						0xfff7fff6fff5fff4 };
++
++/* Expected results for vreinterpretq_u8_xx.  */
++VECT_VAR_DECL(expected_q_u8_1,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++						0xf4, 0xf5, 0xf6, 0xf7,
++						0xf8, 0xf9, 0xfa, 0xfb,
++						0xfc, 0xfd, 0xfe, 0xff };
++VECT_VAR_DECL(expected_q_u8_2,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++						0xf2, 0xff, 0xf3, 0xff,
++						0xf4, 0xff, 0xf5, 0xff,
++						0xf6, 0xff, 0xf7, 0xff };
++VECT_VAR_DECL(expected_q_u8_3,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++						0xf1, 0xff, 0xff, 0xff,
++						0xf2, 0xff, 0xff, 0xff,
++						0xf3, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_u8_4,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff,
++						0xf1, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_u8_5,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++						0xf2, 0xff, 0xf3, 0xff,
++						0xf4, 0xff, 0xf5, 0xff,
++						0xf6, 0xff, 0xf7, 0xff };
++VECT_VAR_DECL(expected_q_u8_6,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++						0xf1, 0xff, 0xff, 0xff,
++						0xf2, 0xff, 0xff, 0xff,
++						0xf3, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_u8_7,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff,
++						0xf1, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_q_u8_8,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++						0xf4, 0xf5, 0xf6, 0xf7,
++						0xf8, 0xf9, 0xfa, 0xfb,
++						0xfc, 0xfd, 0xfe, 0xff };
++VECT_VAR_DECL(expected_q_u8_9,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff,
++						0xf2, 0xff, 0xf3, 0xff,
++						0xf4, 0xff, 0xf5, 0xff,
++						0xf6, 0xff, 0xf7, 0xff };
++
++/* Expected results for vreinterpretq_u16_xx.  */
++VECT_VAR_DECL(expected_q_u16_1,uint,16,8) [] = { 0xf1f0, 0xf3f2,
++						 0xf5f4, 0xf7f6,
++						 0xf9f8, 0xfbfa,
++						 0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_u16_2,uint,16,8) [] = { 0xfff0, 0xfff1,
++						 0xfff2, 0xfff3,
++						 0xfff4, 0xfff5,
++						 0xfff6, 0xfff7 };
++VECT_VAR_DECL(expected_q_u16_3,uint,16,8) [] = { 0xfff0, 0xffff,
++						 0xfff1, 0xffff,
++						 0xfff2, 0xffff,
++						 0xfff3, 0xffff };
++VECT_VAR_DECL(expected_q_u16_4,uint,16,8) [] = { 0xfff0, 0xffff,
++						 0xffff, 0xffff,
++						 0xfff1, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_q_u16_5,uint,16,8) [] = { 0xf1f0, 0xf3f2,
++						 0xf5f4, 0xf7f6,
++						 0xf9f8, 0xfbfa,
++						 0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_u16_6,uint,16,8) [] = { 0xfff0, 0xffff,
++						 0xfff1, 0xffff,
++						 0xfff2, 0xffff,
++						 0xfff3, 0xffff };
++VECT_VAR_DECL(expected_q_u16_7,uint,16,8) [] = { 0xfff0, 0xffff,
++						 0xffff, 0xffff,
++						 0xfff1, 0xffff,
++						 0xffff, 0xffff };
++VECT_VAR_DECL(expected_q_u16_8,uint,16,8) [] = { 0xf1f0, 0xf3f2,
++						 0xf5f4, 0xf7f6,
++						 0xf9f8, 0xfbfa,
++						 0xfdfc, 0xfffe };
++VECT_VAR_DECL(expected_q_u16_9,uint,16,8) [] = { 0xfff0, 0xfff1,
++						 0xfff2, 0xfff3,
++						 0xfff4, 0xfff5,
++						 0xfff6, 0xfff7 };
++
++/* Expected results for vreinterpretq_u32_xx.  */
++VECT_VAR_DECL(expected_q_u32_1,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						 0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_u32_2,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						 0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_u32_3,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
++						 0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_q_u32_4,uint,32,4) [] = { 0xfffffff0, 0xffffffff,
++						 0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_u32_5,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						 0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_u32_6,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						 0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_u32_7,uint,32,4) [] = { 0xfffffff0, 0xffffffff,
++						 0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_u32_8,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						 0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_u32_9,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						 0xfff5fff4, 0xfff7fff6 };
++
++/* Expected results for vreinterpretq_u64_xx.  */
++VECT_VAR_DECL(expected_q_u64_1,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_u64_2,uint,64,2) [] = { 0xfff3fff2fff1fff0,
++						0xfff7fff6fff5fff4 };
++VECT_VAR_DECL(expected_q_u64_3,uint,64,2) [] = { 0xfffffff1fffffff0,
++						0xfffffff3fffffff2 };
++VECT_VAR_DECL(expected_q_u64_4,uint,64,2) [] = { 0xfffffffffffffff0,
++						0xfffffffffffffff1 };
++VECT_VAR_DECL(expected_q_u64_5,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_u64_6,uint,64,2) [] = { 0xfff3fff2fff1fff0,
++						0xfff7fff6fff5fff4 };
++VECT_VAR_DECL(expected_q_u64_7,uint,64,2) [] = { 0xfffffff1fffffff0,
++						0xfffffff3fffffff2 };
++VECT_VAR_DECL(expected_q_u64_8,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
++						0xfffefdfcfbfaf9f8 };
++VECT_VAR_DECL(expected_q_u64_9,uint,64,2) [] = { 0xfff3fff2fff1fff0,
++						 0xfff7fff6fff5fff4 };
++
++/* Expected results for vreinterpret_f32_xx.  */
++VECT_VAR_DECL(expected_f32_1,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_f32_2,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_f32_3,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_f32_4,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_f32_5,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_f32_6,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++VECT_VAR_DECL(expected_f32_7,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_f32_8,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff };
++VECT_VAR_DECL(expected_f32_9,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 };
++VECT_VAR_DECL(expected_f32_10,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 };
++
++/* Expected results for vreinterpretq_f32_xx.  */
++VECT_VAR_DECL(expected_q_f32_1,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						   0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_f32_2,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						   0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_f32_3,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1,
++						   0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_q_f32_4,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff,
++						   0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_f32_5,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						   0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_f32_6,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						   0xfff5fff4, 0xfff7fff6 };
++VECT_VAR_DECL(expected_q_f32_7,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1,
++						   0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_q_f32_8,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff,
++						   0xfffffff1, 0xffffffff };
++VECT_VAR_DECL(expected_q_f32_9,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4,
++						   0xfbfaf9f8, 0xfffefdfc };
++VECT_VAR_DECL(expected_q_f32_10,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2,
++						    0xfff5fff4, 0xfff7fff6 };
++
++/* Expected results for vreinterpretq_xx_f32.  */
++VECT_VAR_DECL(expected_xx_f32_1,int,8,8) [] = { 0x0, 0x0, 0x80, 0xc1,
++						0x0, 0x0, 0x70, 0xc1 };
++VECT_VAR_DECL(expected_xx_f32_2,int,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 };
++VECT_VAR_DECL(expected_xx_f32_3,int,32,2) [] = { 0xc1800000, 0xc1700000 };
++VECT_VAR_DECL(expected_xx_f32_4,int,64,1) [] = { 0xc1700000c1800000 };
++VECT_VAR_DECL(expected_xx_f32_5,uint,8,8) [] = { 0x0, 0x0, 0x80, 0xc1,
++						 0x0, 0x0, 0x70, 0xc1 };
++VECT_VAR_DECL(expected_xx_f32_6,uint,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 };
++VECT_VAR_DECL(expected_xx_f32_7,uint,32,2) [] = { 0xc1800000, 0xc1700000 };
++VECT_VAR_DECL(expected_xx_f32_8,uint,64,1) [] = { 0xc1700000c1800000 };
++VECT_VAR_DECL(expected_xx_f32_9,poly,8,8) [] = { 0x0, 0x0, 0x80, 0xc1,
++						 0x0, 0x0, 0x70, 0xc1 };
++VECT_VAR_DECL(expected_xx_f32_10,poly,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 };
++
++/* Expected results for vreinterpretq_xx_f32.  */
++VECT_VAR_DECL(expected_q_xx_f32_1,int,8,16) [] = { 0x0, 0x0, 0x80, 0xc1,
++						   0x0, 0x0, 0x70, 0xc1,
++						   0x0, 0x0, 0x60, 0xc1,
++						   0x0, 0x0, 0x50, 0xc1 };
++VECT_VAR_DECL(expected_q_xx_f32_2,int,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170,
++						   0x0, 0xc160, 0x0, 0xc150 };
++VECT_VAR_DECL(expected_q_xx_f32_3,int,32,4) [] = { 0xc1800000, 0xc1700000,
++						   0xc1600000, 0xc1500000 };
++VECT_VAR_DECL(expected_q_xx_f32_4,int,64,2) [] = { 0xc1700000c1800000,
++						   0xc1500000c1600000 };
++VECT_VAR_DECL(expected_q_xx_f32_5,uint,8,16) [] = { 0x0, 0x0, 0x80, 0xc1,
++						    0x0, 0x0, 0x70, 0xc1,
++						    0x0, 0x0, 0x60, 0xc1,
++						    0x0, 0x0, 0x50, 0xc1 };
++VECT_VAR_DECL(expected_q_xx_f32_6,uint,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170,
++						    0x0, 0xc160, 0x0, 0xc150 };
++VECT_VAR_DECL(expected_q_xx_f32_7,uint,32,4) [] = { 0xc1800000, 0xc1700000,
++						    0xc1600000, 0xc1500000 };
++VECT_VAR_DECL(expected_q_xx_f32_8,uint,64,2) [] = { 0xc1700000c1800000,
++						    0xc1500000c1600000 };
++VECT_VAR_DECL(expected_q_xx_f32_9,poly,8,16) [] = { 0x0, 0x0, 0x80, 0xc1,
++						    0x0, 0x0, 0x70, 0xc1,
++						    0x0, 0x0, 0x60, 0xc1,
++						    0x0, 0x0, 0x50, 0xc1 };
++VECT_VAR_DECL(expected_q_xx_f32_10,poly,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170,
++						     0x0, 0xc160, 0x0, 0xc150 };
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++#define TEST_MSG "VREINTERPRET/VREINTERPRETQ"
 +
-+FNNAME (INSN)
++void exec_vreinterpret (void)
 +{
-+  /* vector_res = vqrdmulh(vector,vector2), then store the result.  */
-+#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
++  int i;
++
++  /* Basic test: y=vreinterpret(x), then store the result.  */
++#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED)	\
 +  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+		      VECT_VAR(vector2, T1, W, N));			\
++    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
 +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
 +		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
-+
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
-+
-+#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
-+  TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
-+
++  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 +
-+  DECL_VARIABLE(vector, int, 16, 4);
-+  DECL_VARIABLE(vector, int, 32, 2);
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
++#define TEST_VREINTERPRET_POLY(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 +
-+  DECL_VARIABLE(vector_res, int, 16, 4);
-+  DECL_VARIABLE(vector_res, int, 32, 2);
-+  DECL_VARIABLE(vector_res, int, 16, 8);
-+  DECL_VARIABLE(vector_res, int, 32, 4);
++#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
++		    VECT_VAR(vector_res, T1, W, N));			\
++  CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 +
-+  DECL_VARIABLE(vector2, int, 16, 4);
-+  DECL_VARIABLE(vector2, int, 32, 2);
-+  DECL_VARIABLE(vector2, int, 16, 8);
-+  DECL_VARIABLE(vector2, int, 32, 4);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
 +  clean_results ();
 +
-+  VLOAD(vector, buffer, , int, s, 16, 4);
-+  VLOAD(vector, buffer, , int, s, 32, 2);
-+  VLOAD(vector, buffer, q, int, s, 16, 8);
-+  VLOAD(vector, buffer, q, int, s, 32, 4);
-+
-+  /* Initialize vector2.  */
-+  VDUP(vector2, , int, s, 16, 4, 0x5555);
-+  VDUP(vector2, , int, s, 32, 2, 0xBB);
-+  VDUP(vector2, q, int, s, 16, 8, 0x33);
-+  VDUP(vector2, q, int, s, 32, 4, 0x22);
-+
-+#define CMT ""
-+  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT);
-+
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
-+
-+  /* Now use input values such that the multiplication causes
-+     saturation.  */
-+#define TEST_MSG_MUL " (check mul cumulative saturation)"
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector2, , int, s, 16, 4, 0x8000);
-+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector2, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector2, q, int, s, 32, 4, 0x80000000);
 +
-+  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
-+  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
-+  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
-+  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++  VLOAD(vector, buffer, , float, f, 32, 2);
++  VLOAD(vector, buffer, q, float, f, 32, 4);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
++  /* vreinterpret_s8_xx.  */
++  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 16, 4, expected_s8_1);
++  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 32, 2, expected_s8_2);
++  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 64, 1, expected_s8_3);
++  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 8, 8, expected_s8_4);
++  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 16, 4, expected_s8_5);
++  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 32, 2, expected_s8_6);
++  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1, expected_s8_7);
++  TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 8, 8, expected_s8_8);
++  TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 16, 4, expected_s8_9);
++
++  /* vreinterpret_s16_xx.  */
++  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8, expected_s16_1);
++  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 32, 2, expected_s16_2);
++  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 64, 1, expected_s16_3);
++  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 8, 8, expected_s16_4);
++  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 16, 4, expected_s16_5);
++  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 32, 2, expected_s16_6);
++  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1, expected_s16_7);
++  TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 8, 8, expected_s16_8);
++  TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 16, 4, expected_s16_9);
++
++  /* vreinterpret_s32_xx.  */
++  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8, expected_s32_1);
++  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 16, 4, expected_s32_2);
++  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 64, 1, expected_s32_3);
++  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 8, 8, expected_s32_4);
++  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 16, 4, expected_s32_5);
++  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 32, 2, expected_s32_6);
++  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1, expected_s32_7);
++  TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 8, 8, expected_s32_8);
++  TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 16, 4, expected_s32_9);
++
++  /* vreinterpret_s64_xx.  */
++  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8, expected_s64_1);
++  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 16, 4, expected_s64_2);
++  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 32, 2, expected_s64_3);
++  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 8, 8, expected_s64_4);
++  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 16, 4, expected_s64_5);
++  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 32, 2, expected_s64_6);
++  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1, expected_s64_7);
++  TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 8, 8, expected_s64_8);
++  TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 16, 4, expected_s64_9);
++
++  /* vreinterpret_u8_xx.  */
++  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8, expected_u8_1);
++  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 16, 4, expected_u8_2);
++  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 32, 2, expected_u8_3);
++  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 64, 1, expected_u8_4);
++  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 16, 4, expected_u8_5);
++  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 32, 2, expected_u8_6);
++  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1, expected_u8_7);
++  TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 8, 8, expected_u8_8);
++  TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 16, 4, expected_u8_9);
++
++  /* vreinterpret_u16_xx.  */
++  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8, expected_u16_1);
++  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 16, 4, expected_u16_2);
++  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 32, 2, expected_u16_3);
++  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 64, 1, expected_u16_4);
++  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 8, 8, expected_u16_5);
++  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 32, 2, expected_u16_6);
++  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1, expected_u16_7);
++  TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 8, 8, expected_u16_8);
++  TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 16, 4, expected_u16_9);
++
++  /* vreinterpret_u32_xx.  */
++  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8, expected_u32_1);
++  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 16, 4, expected_u32_2);
++  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 32, 2, expected_u32_3);
++  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 64, 1, expected_u32_4);
++  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 8, 8, expected_u32_5);
++  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 16, 4, expected_u32_6);
++  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1, expected_u32_7);
++  TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 8, 8, expected_u32_8);
++  TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 16, 4, expected_u32_9);
++
++  /* vreinterpret_u64_xx.  */
++  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8, expected_u64_1);
++  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 16, 4, expected_u64_2);
++  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 32, 2, expected_u64_3);
++  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 64, 1, expected_u64_4);
++  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 8, 8, expected_u64_5);
++  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 16, 4, expected_u64_6);
++  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2, expected_u64_7);
++  TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 8, 8, expected_u64_8);
++  TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 16, 4, expected_u64_9);
++
++  /* vreinterpret_p8_xx.  */
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 8, 8, expected_p8_1);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 16, 4, expected_p8_2);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 32, 2, expected_p8_3);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 64, 1, expected_p8_4);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 8, 8, expected_p8_5);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 16, 4, expected_p8_6);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 32, 2, expected_p8_7);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 64, 1, expected_p8_8);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, poly, p, 16, 4, expected_p8_9);
++
++  /* vreinterpret_p16_xx.  */
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 8, 8, expected_p16_1);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 16, 4, expected_p16_2);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 32, 2, expected_p16_3);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 64, 1, expected_p16_4);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 8, 8, expected_p16_5);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 16, 4, expected_p16_6);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 32, 2, expected_p16_7);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 64, 1, expected_p16_8);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, poly, p, 8, 8, expected_p16_9);
++
++  /* vreinterpretq_s8_xx.  */
++  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8, expected_q_s8_1);
++  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 32, 4, expected_q_s8_2);
++  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 64, 2, expected_q_s8_3);
++  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 8, 16, expected_q_s8_4);
++  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 16, 8, expected_q_s8_5);
++  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 32, 4, expected_q_s8_6);
++  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2, expected_q_s8_7);
++  TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 8, 16, expected_q_s8_8);
++  TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 16, 8, expected_q_s8_9);
++
++  /* vreinterpretq_s16_xx.  */
++  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16, expected_q_s16_1);
++  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 32, 4, expected_q_s16_2);
++  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 64, 2, expected_q_s16_3);
++  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 8, 16, expected_q_s16_4);
++  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 16, 8, expected_q_s16_5);
++  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 32, 4, expected_q_s16_6);
++  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2, expected_q_s16_7);
++  TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 8, 16, expected_q_s16_8);
++  TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 16, 8, expected_q_s16_9);
++
++  /* vreinterpretq_s32_xx.  */
++  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16, expected_q_s32_1);
++  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 16, 8, expected_q_s32_2);
++  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 64, 2, expected_q_s32_3);
++  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 8, 16, expected_q_s32_4);
++  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 16, 8, expected_q_s32_5);
++  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 32, 4, expected_q_s32_6);
++  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2, expected_q_s32_7);
++  TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 8, 16, expected_q_s32_8);
++  TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 16, 8, expected_q_s32_9);
++
++  /* vreinterpretq_s64_xx.  */
++  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16, expected_q_s64_1);
++  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 16, 8, expected_q_s64_2);
++  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 32, 4, expected_q_s64_3);
++  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 8, 16, expected_q_s64_4);
++  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 16, 8, expected_q_s64_5);
++  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 32, 4, expected_q_s64_6);
++  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2, expected_q_s64_7);
++  TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 8, 16, expected_q_s64_8);
++  TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 16, 8, expected_q_s64_9);
++
++  /* vreinterpretq_u8_xx.  */
++  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16, expected_q_u8_1);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 16, 8, expected_q_u8_2);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 32, 4, expected_q_u8_3);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 64, 2, expected_q_u8_4);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 16, 8, expected_q_u8_5);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 32, 4, expected_q_u8_6);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2, expected_q_u8_7);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 8, 16, expected_q_u8_8);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 16, 8, expected_q_u8_9);
++
++  /* vreinterpretq_u16_xx.  */
++  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16, expected_q_u16_1);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 16, 8, expected_q_u16_2);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 32, 4, expected_q_u16_3);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 64, 2, expected_q_u16_4);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 8, 16, expected_q_u16_5);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 32, 4, expected_q_u16_6);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2, expected_q_u16_7);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 8, 16, expected_q_u16_8);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 16, 8, expected_q_u16_9);
++
++  /* vreinterpretq_u32_xx.  */
++  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16, expected_q_u32_1);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 16, 8, expected_q_u32_2);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 32, 4, expected_q_u32_3);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 64, 2, expected_q_u32_4);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 8, 16, expected_q_u32_5);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 16, 8, expected_q_u32_6);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2, expected_q_u32_7);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 8, 16, expected_q_u32_8);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 16, 8, expected_q_u32_9);
++
++  /* vreinterpretq_u64_xx.  */
++  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16, expected_q_u64_1);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 16, 8, expected_q_u64_2);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 32, 4, expected_q_u64_3);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 64, 2, expected_q_u64_4);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 8, 16, expected_q_u64_5);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 16, 8, expected_q_u64_6);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4, expected_q_u64_7);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 8, 16, expected_q_u64_8);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 16, 8, expected_q_u64_9);
++
++  /* vreinterpret_f32_xx.  */
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8, expected_f32_1);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 16, 4, expected_f32_2);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 32, 2, expected_f32_3);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 64, 1, expected_f32_4);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 8, 8, expected_f32_5);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 16, 4, expected_f32_6);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 32, 2, expected_f32_7);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1, expected_f32_8);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 8, 8, expected_f32_9);
++  TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 16, 4, expected_f32_10);
++
++  /* vreinterpretq_f32_xx.  */
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16, expected_q_f32_1);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 16, 8, expected_q_f32_2);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 32, 4, expected_q_f32_3);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 64, 2, expected_q_f32_4);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 8, 16, expected_q_f32_5);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 16, 8, expected_q_f32_6);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 32, 4, expected_q_f32_7);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2, expected_q_f32_8);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 8, 16, expected_q_f32_9);
++  TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 16, 8, expected_q_f32_10);
++
++  /* vreinterpret_xx_f32.  */
++  TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2, expected_xx_f32_1);
++  TEST_VREINTERPRET(, int, s, 16, 4, float, f, 32, 2, expected_xx_f32_2);
++  TEST_VREINTERPRET(, int, s, 32, 2, float, f, 32, 2, expected_xx_f32_3);
++  TEST_VREINTERPRET(, int, s, 64, 1, float, f, 32, 2, expected_xx_f32_4);
++  TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 32, 2, expected_xx_f32_5);
++  TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2, expected_xx_f32_6);
++  TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2, expected_xx_f32_7);
++  TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2, expected_xx_f32_8);
++  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 32, 2, expected_xx_f32_9);
++  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 32, 2, expected_xx_f32_10);
++
++  /* vreinterpretq_xx_f32.  */
++  TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4, expected_q_xx_f32_1);
++  TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 32, 4, expected_q_xx_f32_2);
++  TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 32, 4, expected_q_xx_f32_3);
++  TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 32, 4, expected_q_xx_f32_4);
++  TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 32, 4, expected_q_xx_f32_5);
++  TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4, expected_q_xx_f32_6);
++  TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4, expected_q_xx_f32_7);
++  TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4, expected_q_xx_f32_8);
++  TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4, expected_q_xx_f32_9);
++  TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4, expected_q_xx_f32_10);
++}
 +
-+  /* Use input values where rounding produces a result equal to the
-+     saturation value, but does not set the saturation flag.  */
-+#define TEST_MSG_ROUND " (check rounding)"
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector2, , int, s, 16, 4, 0x8001);
-+  VDUP(vector2, , int, s, 32, 2, 0x80000001);
-+  VDUP(vector2, q, int, s, 16, 8, 0x8001);
-+  VDUP(vector2, q, int, s, 32, 4, 0x80000001);
++int main (void)
++{
++  exec_vreinterpret ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
+@@ -0,0 +1,200 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
-+  TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND);
-+  TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND);
-+  TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
++/* Expected results for vrev16.  */
++VECT_VAR_DECL(expected_vrev16,int,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++					      0xf5, 0xf4, 0xf7, 0xf6 };
++VECT_VAR_DECL(expected_vrev16,uint,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++					       0xf5, 0xf4, 0xf7, 0xf6 };
++VECT_VAR_DECL(expected_vrev16,poly,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++					       0xf5, 0xf4, 0xf7, 0xf6 };
++VECT_VAR_DECL(expected_vrev16,int,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++					       0xf5, 0xf4, 0xf7, 0xf6,
++					       0xf9, 0xf8, 0xfb, 0xfa,
++					       0xfd, 0xfc, 0xff, 0xfe };
++VECT_VAR_DECL(expected_vrev16,uint,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++						0xf5, 0xf4, 0xf7, 0xf6,
++						0xf9, 0xf8, 0xfb, 0xfa,
++						0xfd, 0xfc, 0xff, 0xfe };
++VECT_VAR_DECL(expected_vrev16,poly,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2,
++						0xf5, 0xf4, 0xf7, 0xf6,
++						0xf9, 0xf8, 0xfb, 0xfa,
++						0xfd, 0xfc, 0xff, 0xfe };
++
++/* Expected results for vrev32.  */
++VECT_VAR_DECL(expected_vrev32,int,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++					      0xf7, 0xf6, 0xf5, 0xf4 };
++VECT_VAR_DECL(expected_vrev32,int,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 };
++VECT_VAR_DECL(expected_vrev32,uint,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++					       0xf7, 0xf6, 0xf5, 0xf4 };
++VECT_VAR_DECL(expected_vrev32,uint,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 };
++VECT_VAR_DECL(expected_vrev32,poly,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++					       0xf7, 0xf6, 0xf5, 0xf4 };
++VECT_VAR_DECL(expected_vrev32,poly,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 };
++VECT_VAR_DECL(expected_vrev32,int,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++					       0xf7, 0xf6, 0xf5, 0xf4,
++					       0xfb, 0xfa, 0xf9, 0xf8,
++					       0xff, 0xfe, 0xfd, 0xfc };
++VECT_VAR_DECL(expected_vrev32,int,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2,
++					       0xfff5, 0xfff4, 0xfff7, 0xfff6 };
++VECT_VAR_DECL(expected_vrev32,uint,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++						0xf7, 0xf6, 0xf5, 0xf4,
++						0xfb, 0xfa, 0xf9, 0xf8,
++						0xff, 0xfe, 0xfd, 0xfc };
++VECT_VAR_DECL(expected_vrev32,uint,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2,
++						0xfff5, 0xfff4, 0xfff7, 0xfff6 };
++VECT_VAR_DECL(expected_vrev32,poly,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0,
++						0xf7, 0xf6, 0xf5, 0xf4,
++						0xfb, 0xfa, 0xf9, 0xf8,
++						0xff, 0xfe, 0xfd, 0xfc };
++VECT_VAR_DECL(expected_vrev32,poly,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2,
++						0xfff5, 0xfff4, 0xfff7, 0xfff6 };
++
++/* Expected results for vrev64.  */
++VECT_VAR_DECL(expected_vrev64,int,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
++					      0xf3, 0xf2, 0xf1, 0xf0 };
++VECT_VAR_DECL(expected_vrev64,int,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
++VECT_VAR_DECL(expected_vrev64,int,32,2) [] = { 0xfffffff1, 0xfffffff0 };
++VECT_VAR_DECL(expected_vrev64,uint,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3,
++					       0xf2, 0xf1, 0xf0 };
++VECT_VAR_DECL(expected_vrev64,uint,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
++VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
++VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
++					       0xf3, 0xf2, 0xf1, 0xf0 };
++VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
++VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
++VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
++					       0xf3, 0xf2, 0xf1, 0xf0,
++					       0xff, 0xfe, 0xfd, 0xfc,
++					       0xfb, 0xfa, 0xf9, 0xf8 };
++VECT_VAR_DECL(expected_vrev64,int,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
++					       0xfff7, 0xfff6, 0xfff5, 0xfff4 };
++VECT_VAR_DECL(expected_vrev64,int,32,4) [] = { 0xfffffff1, 0xfffffff0,
++					       0xfffffff3, 0xfffffff2 };
++VECT_VAR_DECL(expected_vrev64,uint,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
++						0xf3, 0xf2, 0xf1, 0xf0,
++						0xff, 0xfe, 0xfd, 0xfc,
++						0xfb, 0xfa, 0xf9, 0xf8 };
++VECT_VAR_DECL(expected_vrev64,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
++						0xfff7, 0xfff6, 0xfff5, 0xfff4 };
++VECT_VAR_DECL(expected_vrev64,uint,32,4) [] = { 0xfffffff1, 0xfffffff0,
++						0xfffffff3, 0xfffffff2 };
++VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
++						0xf3, 0xf2, 0xf1, 0xf0,
++						0xff, 0xfe, 0xfd, 0xfc,
++						0xfb, 0xfa, 0xf9, 0xf8 };
++VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
++						0xfff7, 0xfff6, 0xfff5, 0xfff4 };
++VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
++						  0xc1500000, 0xc1600000 };
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++void exec_vrev (void)
++{
++  /* Basic test: y=vrev(x), then store the result.  */
++#define TEST_VREV(Q, T1, T2, W, N, W2)					\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++
++  clean_results ();
++
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++  VLOAD(vector, buffer, , float, f, 32, 2);
++  VLOAD(vector, buffer, q, float, f, 32, 4);
++
++  /* Check vrev in each of the existing combinations.  */
++#define TEST_MSG "VREV16"
++  TEST_VREV(, int, s, 8, 8, 16);
++  TEST_VREV(, uint, u, 8, 8, 16);
++  TEST_VREV(, poly, p, 8, 8, 16);
++  TEST_VREV(q, int, s, 8, 16, 16);
++  TEST_VREV(q, uint, u, 8, 16, 16);
++  TEST_VREV(q, poly, p, 8, 16, 16);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev16, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev16, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev16, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev16, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev16, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev16, "");
++
++#undef TEST_MSG
++#define TEST_MSG "VREV32"
++  TEST_VREV(, int, s, 8, 8, 32);
++  TEST_VREV(, int, s, 16, 4, 32);
++  TEST_VREV(, uint, u, 8, 8, 32);
++  TEST_VREV(, uint, u, 16, 4, 32);
++  TEST_VREV(, poly, p, 8, 8, 32);
++  TEST_VREV(, poly, p, 16, 4, 32);
++  TEST_VREV(q, int, s, 8, 16, 32);
++  TEST_VREV(q, int, s, 16, 8, 32);
++  TEST_VREV(q, uint, u, 8, 16, 32);
++  TEST_VREV(q, uint, u, 16, 8, 32);
++  TEST_VREV(q, poly, p, 8, 16, 32);
++  TEST_VREV(q, poly, p, 16, 8, 32);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev32, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev32, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev32, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev32, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev32, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev32, "");
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev32, "");
++
++#undef TEST_MSG
++#define TEST_MSG "VREV64"
++  TEST_VREV(, int, s, 8, 8, 64);
++  TEST_VREV(, int, s, 16, 4, 64);
++  TEST_VREV(, int, s, 32, 2, 64);
++  TEST_VREV(, uint, u, 8, 8, 64);
++  TEST_VREV(, uint, u, 16, 4, 64);
++  TEST_VREV(, uint, u, 32, 2, 64);
++  TEST_VREV(, poly, p, 8, 8, 64);
++  TEST_VREV(, poly, p, 16, 4, 64);
++  TEST_VREV(q, int, s, 8, 16, 64);
++  TEST_VREV(q, int, s, 16, 8, 64);
++  TEST_VREV(q, int, s, 32, 4, 64);
++  TEST_VREV(q, uint, u, 8, 16, 64);
++  TEST_VREV(q, uint, u, 16, 8, 64);
++  TEST_VREV(q, uint, u, 32, 4, 64);
++  TEST_VREV(q, poly, p, 8, 16, 64);
++  TEST_VREV(q, poly, p, 16, 8, 64);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev64, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_vrev64, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev64, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev64, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev64, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_vrev64, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
++
++  TEST_VREV(, float, f, 32, 2, 64);
++  TEST_VREV(q, float, f, 32, 4, 64);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_vrev64, "");
 +}
 +
 +int main (void)
 +{
-+  exec_vqrdmulh ();
++  exec_vrev ();
 +  return 0;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c
-@@ -0,0 +1,169 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshl.c
+@@ -0,0 +1,627 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
++/* Expected results with input=0.  */
++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
++
++/* Expected results with input=0 and negative shift amount.  */
++VECT_VAR_DECL(expected_0_sh_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0_sh_neg,uint,64,2) [] = { 0x0, 0x0 };
 +
 +/* Expected results.  */
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
++				       0xe8, 0xea, 0xec, 0xee };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
++					0xe8, 0xea, 0xec, 0xee };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					0x0, 0x0, 0x0, 0x0,
++					0x0, 0x0, 0x0, 0x0,
 +					0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
++					0x4000, 0x5000, 0x6000, 0x7000 };
 +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
++					 0x4000, 0x5000, 0x6000, 0x7000 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
 +
-+/* Expected values of cumulative_saturation flag when multiplication
-+   saturates.  */
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
++/* Expected results with negative shift amount.  */
++VECT_VAR_DECL(expected_sh_neg,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
++					      0xfa, 0xfb, 0xfb, 0xfc };
++VECT_VAR_DECL(expected_sh_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd };
++VECT_VAR_DECL(expected_sh_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected_sh_neg,int,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected_sh_neg,uint,8,8) [] = { 0x78, 0x79, 0x79, 0x7a,
++					       0x7a, 0x7b, 0x7b, 0x7c };
++VECT_VAR_DECL(expected_sh_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd };
++VECT_VAR_DECL(expected_sh_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
++VECT_VAR_DECL(expected_sh_neg,uint,64,1) [] = { 0xfffffffffffffff };
++VECT_VAR_DECL(expected_sh_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2,
++						0x2, 0x2, 0x2, 0x2,
++						0x2, 0x2, 0x2, 0x2,
++						0x2, 0x2, 0x2, 0x2 };
++VECT_VAR_DECL(expected_sh_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20,
++						0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_sh_neg,uint,32,4) [] = { 0x80000, 0x80000,
++						0x80000, 0x80000 };
++VECT_VAR_DECL(expected_sh_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 };
++
++/* Expected results with max input value shifted by -1 to test
++   round_const.  */
++VECT_VAR_DECL(expected_max_sh_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
++						     0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,16,4) [] = { 0x4000, 0x4000,
++						      0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,32,2) [] = { 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,64,1) [] = { 0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						      0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,16,4) [] = { 0x8000, 0x8000,
++						       0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
++						      0x40, 0x40, 0x40, 0x40,
++						      0x40, 0x40, 0x40, 0x40,
++						      0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,16,8) [] = { 0x4000, 0x4000,
++						      0x4000, 0x4000,
++						      0x4000, 0x4000,
++						      0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,32,4) [] = { 0x40000000, 0x40000000,
++						      0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,int,64,2) [] = { 0x4000000000000000,
++						      0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						       0x80, 0x80, 0x80, 0x80,
++						       0x80, 0x80, 0x80, 0x80,
++						       0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,16,8) [] = { 0x8000, 0x8000,
++						       0x8000, 0x8000,
++						       0x8000, 0x8000,
++						       0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,32,4) [] = { 0x80000000, 0x80000000,
++						       0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh_minus1,uint,64,2) [] = { 0x8000000000000000,
++						       0x8000000000000000 };
++
++/* Expected results with max input value shifted by -3 to test
++   round_const.  */
++VECT_VAR_DECL(expected_max_sh_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
++						     0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,16,4) [] = { 0x1000, 0x1000,
++						      0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,32,2) [] = { 0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,64,1) [] = { 0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
++						      0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,16,4) [] = { 0x2000, 0x2000,
++						       0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,64,1) [] = { 0x2000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
++						      0x10, 0x10, 0x10, 0x10,
++						      0x10, 0x10, 0x10, 0x10,
++						      0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,16,8) [] = { 0x1000, 0x1000,
++						      0x1000, 0x1000,
++						      0x1000, 0x1000,
++						      0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,32,4) [] = { 0x10000000, 0x10000000,
++						      0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,int,64,2) [] = { 0x1000000000000000,
++						      0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
++						       0x20, 0x20, 0x20, 0x20,
++						       0x20, 0x20, 0x20, 0x20,
++						       0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,16,8) [] = { 0x2000, 0x2000,
++						       0x2000, 0x2000,
++						       0x2000, 0x2000,
++						       0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,32,4) [] = { 0x20000000, 0x20000000,
++						       0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh_minus3,uint,64,2) [] = { 0x2000000000000000,
++						       0x2000000000000000 };
++
++/* Expected results with negative shift by vector width.  */
++VECT_VAR_DECL(expected_max_sh_minus_width,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++							   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++							   0x0, 0x0, 0x0, 0x0,
++							   0x0, 0x0, 0x0, 0x0,
++							   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++							    0x1, 0x1, 0x1, 0x1,
++							    0x1, 0x1, 0x1, 0x1,
++							    0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++							    0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_minus_width,uint,64,2) [] = { 0x1, 0x1 };
 +
-+/* Expected results when multiplication saturates.  */
-+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					    0x7fffffff, 0x7fffffff };
++/* Expected results with large shift amount.  */
++VECT_VAR_DECL(expected_max_sh_large,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						    0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large,uint,64,2) [] = { 0x0, 0x0 };
 +
-+/* Expected values of cumulative_saturation flag when rounding
-+   should not cause saturation.  */
-+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
++/* Expected results with large negative shift amount.  */
++VECT_VAR_DECL(expected_max_sh_large_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++							 0x0, 0x0, 0x0, 0x0,
++							 0x0, 0x0, 0x0, 0x0,
++							 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++							 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++							  0x0, 0x0, 0x0, 0x0,
++							  0x0, 0x0, 0x0, 0x0,
++							  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++							  0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_large_neg,uint,64,2) [] = { 0x1, 0x1 };
 +
-+/* Expected results when rounding should not cause saturation.  */
-+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					      0x7fffffff, 0x7fffffff };
++#define TEST_MSG "VRSHL/VRSHLQ"
++void exec_vrshl (void)
++{
++  /* Basic test: v3=vrshl(v1,v2), then store the result.  */
++#define TEST_VRSHL(T3, Q, T1, T2, W, N)					\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		       VECT_VAR(vector_shift, T3, W, N));		\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
 +
-+#define INSN vqrdmulh
-+#define TEST_MSG "VQRDMULH_LANE"
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
-+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
 +
-+FNNAME (INSN)
-+{
-+  /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result.  */
-+#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+			   VECT_VAR(vector2, T1, W, N2),		\
-+			   L);						\
-+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  clean_results ();
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Fill input vector with 0, to check behavior on limits.  */
++  VDUP(vector, , int, s, 8, 8, 0);
++  VDUP(vector, , int, s, 16, 4, 0);
++  VDUP(vector, , int, s, 32, 2, 0);
++  VDUP(vector, , int, s, 64, 1, 0);
++  VDUP(vector, , uint, u, 8, 8, 0);
++  VDUP(vector, , uint, u, 16, 4, 0);
++  VDUP(vector, , uint, u, 32, 2, 0);
++  VDUP(vector, , uint, u, 64, 1, 0);
++  VDUP(vector, q, int, s, 8, 16, 0);
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 8, 16, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0);
++  VDUP(vector, q, uint, u, 32, 4, 0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
 +
-+#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Choose init value arbitrarily, will be used as shift amount.  */
++  /* Use values equal to one-less-than the type width to check
++     behaviour on limits.  */
++  VDUP(vector_shift, , int, s, 8, 8, 7);
++  VDUP(vector_shift, , int, s, 16, 4, 15);
++  VDUP(vector_shift, , int, s, 32, 2, 31);
++  VDUP(vector_shift, , int, s, 64, 1, 63);
++  VDUP(vector_shift, q, int, s, 8, 16, 7);
++  VDUP(vector_shift, q, int, s, 16, 8, 15);
++  VDUP(vector_shift, q, int, s, 32, 4, 31);
++  VDUP(vector_shift, q, int, s, 64, 2, 63);
++
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
++
++#define CMT " (with input = 0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
++
++
++  /* Use negative shift amounts.  */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
++
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
++#undef CMT
++#define CMT " (input 0 and negative shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_sh_neg, CMT);
++
++
++  /* Test again, with predefined input values.  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose init value arbitrarily, will be used as shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 1);
++  VDUP(vector_shift, , int, s, 16, 4, 3);
++  VDUP(vector_shift, , int, s, 32, 2, 8);
++  VDUP(vector_shift, , int, s, 64, 1, -3);
++  VDUP(vector_shift, q, int, s, 8, 16, 10);
++  VDUP(vector_shift, q, int, s, 16, 8, 12);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 63);
++
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
++
++#undef CMT
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++
++
++  /* Use negative shift amounts.  */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -2);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -4);
++  VDUP(vector_shift, q, int, s, 8, 16, -7);
++  VDUP(vector_shift, q, int, s, 16, 8, -11);
++  VDUP(vector_shift, q, int, s, 32, 4, -13);
++  VDUP(vector_shift, q, int, s, 64, 2, -20);
++
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
++
++#undef CMT
++#define CMT " (negative shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh_neg, CMT);
++
++  /* Fill input vector with max value, to check behavior on limits.  */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
 +
-+  DECL_VARIABLE(vector, int, 16, 4);
-+  DECL_VARIABLE(vector, int, 32, 2);
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
++  /* Use -1 shift amount to check overflow with round_const.  */
++  VDUP(vector_shift, , int, s, 8, 8, -1);
++  VDUP(vector_shift, , int, s, 16, 4, -1);
++  VDUP(vector_shift, , int, s, 32, 2, -1);
++  VDUP(vector_shift, , int, s, 64, 1, -1);
++  VDUP(vector_shift, q, int, s, 8, 16, -1);
++  VDUP(vector_shift, q, int, s, 16, 8, -1);
++  VDUP(vector_shift, q, int, s, 32, 4, -1);
++  VDUP(vector_shift, q, int, s, 64, 2, -1);
 +
-+  DECL_VARIABLE(vector_res, int, 16, 4);
-+  DECL_VARIABLE(vector_res, int, 32, 2);
-+  DECL_VARIABLE(vector_res, int, 16, 8);
-+  DECL_VARIABLE(vector_res, int, 32, 4);
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
-+  /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with
-+     the same number of elements, so we need only one variable of each
-+     type.  */
-+  DECL_VARIABLE(vector2, int, 16, 4);
-+  DECL_VARIABLE(vector2, int, 32, 2);
++#undef CMT
++#define CMT " (max input, shift by -1)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus1, CMT);
 +
-+  clean_results ();
++  /* Use -3 shift amount to check overflow with round_const.  */
++  VDUP(vector_shift, , int, s, 8, 8, -3);
++  VDUP(vector_shift, , int, s, 16, 4, -3);
++  VDUP(vector_shift, , int, s, 32, 2, -3);
++  VDUP(vector_shift, , int, s, 64, 1, -3);
++  VDUP(vector_shift, q, int, s, 8, 16, -3);
++  VDUP(vector_shift, q, int, s, 16, 8, -3);
++  VDUP(vector_shift, q, int, s, 32, 4, -3);
++  VDUP(vector_shift, q, int, s, 64, 2, -3);
 +
-+  VLOAD(vector, buffer, , int, s, 16, 4);
-+  VLOAD(vector, buffer, , int, s, 32, 2);
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
-+  VLOAD(vector, buffer, q, int, s, 16, 8);
-+  VLOAD(vector, buffer, q, int, s, 32, 4);
++#undef CMT
++#define CMT " (check rounding constant: max input, shift by -3)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus3, CMT);
++
++
++  /* Use negative shift amount as large as input vector width.  */
++  VDUP(vector_shift, , int, s, 8, 8, -8);
++  VDUP(vector_shift, , int, s, 16, 4, -16);
++  VDUP(vector_shift, , int, s, 32, 2, -32);
++  VDUP(vector_shift, , int, s, 64, 1, -64);
++  VDUP(vector_shift, q, int, s, 8, 16, -8);
++  VDUP(vector_shift, q, int, s, 16, 8, -16);
++  VDUP(vector_shift, q, int, s, 32, 4, -32);
++  VDUP(vector_shift, q, int, s, 64, 2, -64);
 +
-+  /* Initialize vector2.  */
-+  VDUP(vector2, , int, s, 16, 4, 0x55);
-+  VDUP(vector2, , int, s, 32, 2, 0xBB);
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
-+  /* Choose lane arbitrarily.  */
-+#define CMT ""
-+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
++#undef CMT
++#define CMT " (max input, right shift by vector width)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_minus_width, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_minus_width, CMT);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
 +
-+  /* Now use input values such that the multiplication causes
-+     saturation.  */
-+#define TEST_MSG_MUL " (check mul cumulative saturation)"
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector2, , int, s, 16, 4, 0x8000);
-+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
++  /* Test large shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, 10);
++  VDUP(vector_shift, , int, s, 16, 4, 20);
++  VDUP(vector_shift, , int, s, 32, 2, 33);
++  VDUP(vector_shift, , int, s, 64, 1, 65);
++  VDUP(vector_shift, q, int, s, 8, 16, 9);
++  VDUP(vector_shift, q, int, s, 16, 8, 16);
++  VDUP(vector_shift, q, int, s, 32, 4, 32);
++  VDUP(vector_shift, q, int, s, 64, 2, 64);
 +
-+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
-+		     TEST_MSG_MUL);
-+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
-+		     TEST_MSG_MUL);
-+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
-+		     TEST_MSG_MUL);
-+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
-+		     TEST_MSG_MUL);
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
++#undef CMT
++#define CMT " (max input, large shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_large, CMT);
 +
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector2, , int, s, 16, 4, 0x8001);
-+  VDUP(vector2, , int, s, 32, 2, 0x80000001);
++  
++  /* Test large negative shift amount.  */
++  VDUP(vector_shift, , int, s, 8, 8, -10);
++  VDUP(vector_shift, , int, s, 16, 4, -20);
++  VDUP(vector_shift, , int, s, 32, 2, -33);
++  VDUP(vector_shift, , int, s, 64, 1, -65);
++  VDUP(vector_shift, q, int, s, 8, 16, -9);
++  VDUP(vector_shift, q, int, s, 16, 8, -16);
++  VDUP(vector_shift, q, int, s, 32, 4, -32);
++  VDUP(vector_shift, q, int, s, 64, 2, -64);
 +
-+  /* Use input values where rounding produces a result equal to the
-+     saturation value, but does not set the saturation flag.  */
-+#define TEST_MSG_ROUND " (check rounding)"
-+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
-+		     TEST_MSG_ROUND);
-+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
-+		     TEST_MSG_ROUND);
-+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
-+		     TEST_MSG_ROUND);
-+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
-+		     TEST_MSG_ROUND);
++  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++#undef CMT
++#define CMT " (max input, large negative shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_large_neg, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_large_neg, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqrdmulh_lane ();
++  exec_vrshl ();
 +  return 0;
 +}
-+
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c
-@@ -0,0 +1,155 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshr_n.c
+@@ -0,0 +1,504 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+
 +/* Expected results.  */
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5,
-+					0x5, 0x4, 0x4, 0x4 };
-+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe,
-+					0xfffffffe, 0xfffffffe };
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
++				       0xfa, 0xfb, 0xfb, 0xfc };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d,
++					0x3d, 0x3d, 0x3e, 0x3e };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x8000000, 0x8000000 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x80000000 };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
++					0xfa, 0xfb, 0xfb, 0xfc,
++					0xfc, 0xfd, 0xfd, 0xfe,
++					0xfe, 0xff, 0xff, 0x0 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc,
++					0xfffffffd, 0xfffffffd };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3d, 0x3d,
++					 0x3d, 0x3d, 0x3e, 0x3e,
++					 0x3e, 0x3e, 0x3f, 0x3f,
++					 0x3f, 0x3f, 0x40, 0x40 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe,
++					 0x1fff, 0x1fff, 0x1fff, 0x1fff };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x8000000, 0x8000000,
++					 0x8000000, 0x8000000 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x80000000, 0x80000000 };
 +
-+/* Expected values of cumulative_saturation flag when multiplication
-+   saturates.  */
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
++/* Expected results with maximum input and max shift amount.  */
++VECT_VAR_DECL(expected_max_sh_max,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						   0x0, 0x0, 0x0, 0x0,
++						   0x0, 0x0, 0x0, 0x0,
++						   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						   0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_sh_max,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++						    0x1, 0x1, 0x1, 0x1,
++						    0x1, 0x1, 0x1, 0x1,
++						    0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						    0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_sh_max,uint,64,2) [] = { 0x1, 0x1 };
++
++/* Expected results with maximum input and shift by 1.  */
++VECT_VAR_DECL(expected_max_sh_1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
++						0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_max_sh_1,int,16,4) [] = { 0x4000, 0x4000,
++						 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh_1,int,32,2) [] = { 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh_1,int,64,1) [] = { 0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						 0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_sh_1,uint,16,4) [] = { 0x8000, 0x8000,
++						  0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_max_sh_1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
++						 0x40, 0x40, 0x40, 0x40,
++						 0x40, 0x40, 0x40, 0x40,
++						 0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_max_sh_1,int,16,8) [] = { 0x4000, 0x4000,
++						 0x4000, 0x4000,
++						 0x4000, 0x4000,
++						 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh_1,int,32,4) [] = { 0x40000000, 0x40000000,
++						 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh_1,int,64,2) [] = { 0x4000000000000000,
++						 0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80,
++						  0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_sh_1,uint,16,8) [] = { 0x8000, 0x8000,
++						  0x8000, 0x8000,
++						  0x8000, 0x8000,
++						  0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,32,4) [] = { 0x80000000, 0x80000000,
++						  0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh_1,uint,64,2) [] = { 0x8000000000000000,
++						  0x8000000000000000 };
 +
-+/* Expected results when multiplication saturates.  */
-+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					    0x7fffffff, 0x7fffffff };
++/* Expected results with maximum input and shift by 3.  */
++VECT_VAR_DECL(expected_max_sh_3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
++						0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_max_sh_3,int,16,4) [] = { 0x1000, 0x1000,
++						 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh_3,int,32,2) [] = { 0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh_3,int,64,1) [] = { 0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
++						 0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_sh_3,uint,16,4) [] = { 0x2000, 0x2000,
++						  0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,32,2) [] = { 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,64,1) [] = { 0x2000000000000000 };
++VECT_VAR_DECL(expected_max_sh_3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
++						 0x10, 0x10, 0x10, 0x10,
++						 0x10, 0x10, 0x10, 0x10,
++						 0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_max_sh_3,int,16,8) [] = { 0x1000, 0x1000,
++						 0x1000, 0x1000,
++						 0x1000, 0x1000,
++						 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh_3,int,32,4) [] = { 0x10000000, 0x10000000,
++						 0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh_3,int,64,2) [] = { 0x1000000000000000,
++						 0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
++						  0x20, 0x20, 0x20, 0x20,
++						  0x20, 0x20, 0x20, 0x20,
++						  0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_sh_3,uint,16,8) [] = { 0x2000, 0x2000,
++						  0x2000, 0x2000,
++						  0x2000, 0x2000,
++						  0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,32,4) [] = { 0x20000000, 0x20000000,
++						  0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh_3,uint,64,2) [] = { 0x2000000000000000,
++						  0x2000000000000000 };
++
++/* Expected results with max negative input (for signed types, shift
++   by 1.  */
++VECT_VAR_DECL(expected_max_neg_sh_1,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0,
++						    0xc0, 0xc0, 0xc0, 0xc0 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,16,4) [] = { 0xc000, 0xc000,
++						     0xc000, 0xc000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,32,2) [] = { 0xc0000000, 0xc0000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,64,1) [] = { 0xc000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
++						     0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,16,4) [] = { 0x8000, 0x8000,
++						      0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0,
++						     0xc0, 0xc0, 0xc0, 0xc0,
++						     0xc0, 0xc0, 0xc0, 0xc0,
++						     0xc0, 0xc0, 0xc0, 0xc0 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,16,8) [] = { 0xc000, 0xc000,
++						     0xc000, 0xc000,
++						     0xc000, 0xc000,
++						     0xc000, 0xc000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,32,4) [] = { 0xc0000000, 0xc0000000,
++						     0xc0000000, 0xc0000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,int,64,2) [] = { 0xc000000000000000,
++						     0xc000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						      0x80, 0x80, 0x80, 0x80,
++						      0x80, 0x80, 0x80, 0x80,
++						      0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,16,8) [] = { 0x8000, 0x8000,
++						      0x8000, 0x8000,
++						      0x8000, 0x8000,
++						      0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,32,4) [] = { 0x80000000, 0x80000000,
++						      0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_neg_sh_1,uint,64,2) [] = { 0x8000000000000000,
++						      0x8000000000000000 };
++
++/* Expected results with max negative input (for signed types, shift
++   by 3.  */
++VECT_VAR_DECL(expected_max_neg_sh_3,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
++						    0xf0, 0xf0, 0xf0, 0xf0 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,16,4) [] = { 0xf000, 0xf000,
++						     0xf000, 0xf000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,32,2) [] = { 0xf0000000, 0xf0000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,64,1) [] = { 0xf000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
++						     0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,16,4) [] = { 0x2000, 0x2000,
++						      0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,32,2) [] = { 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,64,1) [] = { 0x2000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
++						     0xf0, 0xf0, 0xf0, 0xf0,
++						     0xf0, 0xf0, 0xf0, 0xf0,
++						     0xf0, 0xf0, 0xf0, 0xf0 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,16,8) [] = { 0xf000, 0xf000,
++						     0xf000, 0xf000,
++						     0xf000, 0xf000,
++						     0xf000, 0xf000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,32,4) [] = { 0xf0000000, 0xf0000000,
++						     0xf0000000, 0xf0000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,int,64,2) [] = { 0xf000000000000000,
++						     0xf000000000000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
++						      0x20, 0x20, 0x20, 0x20,
++						      0x20, 0x20, 0x20, 0x20,
++						      0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,16,8) [] = { 0x2000, 0x2000,
++						      0x2000, 0x2000,
++						      0x2000, 0x2000,
++						      0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,32,4) [] = { 0x20000000, 0x20000000,
++						      0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_neg_sh_3,uint,64,2) [] = { 0x2000000000000000,
++						      0x2000000000000000 };
 +
-+/* Expected values of cumulative_saturation flag when rounding
-+   should not cause saturation.  */
-+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
++#define TEST_MSG "VRSHR_N"
++void exec_vrshr_n (void)
++{
++  /* Basic test: y=vrshr_n(x,v), then store the result.  */
++#define TEST_VRSHR_N(Q, T1, T2, W, N, V)				\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vrshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			 V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
 +
-+/* Expected results when rounding should not cause saturation.  */
-+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					      0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					      0x7fffffff, 0x7fffffff };
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
-+#define INSN vqrdmulh
-+#define TEST_MSG "VQRDMULH_N"
++  clean_results ();
 +
-+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
 +
-+FNNAME (INSN)
-+{
-+  int i;
++  /* Choose shift amount arbitrarily.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 1);
++  TEST_VRSHR_N(, int, s, 16, 4, 12);
++  TEST_VRSHR_N(, int, s, 32, 2, 2);
++  TEST_VRSHR_N(, int, s, 64, 1, 32);
++  TEST_VRSHR_N(, uint, u, 8, 8, 2);
++  TEST_VRSHR_N(, uint, u, 16, 4, 3);
++  TEST_VRSHR_N(, uint, u, 32, 2, 5);
++  TEST_VRSHR_N(, uint, u, 64, 1, 33);
++
++  TEST_VRSHR_N(q, int, s, 8, 16, 1);
++  TEST_VRSHR_N(q, int, s, 16, 8, 12);
++  TEST_VRSHR_N(q, int, s, 32, 4, 2);
++  TEST_VRSHR_N(q, int, s, 64, 2, 32);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 2);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 5);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 33);
 +
-+  /* vector_res = vqrdmulh_n(vector,val), then store the result.  */
-+#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+			L);						\
-+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++
++
++  /* Use maximum positive input value.  */
++  VDUP(vector, , int, s, 8, 8, 0x7F);
++  VDUP(vector, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x7F);
++  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++  /* Use max shift amount, to exercise saturation.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 8);
++  TEST_VRSHR_N(, int, s, 16, 4, 16);
++  TEST_VRSHR_N(, int, s, 32, 2, 32);
++  TEST_VRSHR_N(, int, s, 64, 1, 64);
++  TEST_VRSHR_N(, uint, u, 8, 8, 8);
++  TEST_VRSHR_N(, uint, u, 16, 4, 16);
++  TEST_VRSHR_N(, uint, u, 32, 2, 32);
++  TEST_VRSHR_N(, uint, u, 64, 1, 64);
++  TEST_VRSHR_N(q, int, s, 8, 16, 8);
++  TEST_VRSHR_N(q, int, s, 16, 8, 16);
++  TEST_VRSHR_N(q, int, s, 32, 4, 32);
++  TEST_VRSHR_N(q, int, s, 64, 2, 64);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 8);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 16);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 32);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 64);
++
++#undef CMT
++#define CMT " (overflow test: max shift amount, max positive input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_max, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_max, CMT);
++
++
++  /* Use 1 as shift amount, to exercise saturation.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 1);
++  TEST_VRSHR_N(, int, s, 16, 4, 1);
++  TEST_VRSHR_N(, int, s, 32, 2, 1);
++  TEST_VRSHR_N(, int, s, 64, 1, 1);
++  TEST_VRSHR_N(, uint, u, 8, 8, 1);
++  TEST_VRSHR_N(, uint, u, 16, 4, 1);
++  TEST_VRSHR_N(, uint, u, 32, 2, 1);
++  TEST_VRSHR_N(, uint, u, 64, 1, 1);
++  TEST_VRSHR_N(q, int, s, 8, 16, 1);
++  TEST_VRSHR_N(q, int, s, 16, 8, 1);
++  TEST_VRSHR_N(q, int, s, 32, 4, 1);
++  TEST_VRSHR_N(q, int, s, 64, 2, 1);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 1);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 1);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 1);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 1);
++
++#undef CMT
++#define CMT " (overflow test: shift by 1, with max input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_1, CMT);
++
++
++  /* Use 3 as shift amount, to exercise saturation.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 3);
++  TEST_VRSHR_N(, int, s, 16, 4, 3);
++  TEST_VRSHR_N(, int, s, 32, 2, 3);
++  TEST_VRSHR_N(, int, s, 64, 1, 3);
++  TEST_VRSHR_N(, uint, u, 8, 8, 3);
++  TEST_VRSHR_N(, uint, u, 16, 4, 3);
++  TEST_VRSHR_N(, uint, u, 32, 2, 3);
++  TEST_VRSHR_N(, uint, u, 64, 1, 3);
++  TEST_VRSHR_N(q, int, s, 8, 16, 3);
++  TEST_VRSHR_N(q, int, s, 16, 8, 3);
++  TEST_VRSHR_N(q, int, s, 32, 4, 3);
++  TEST_VRSHR_N(q, int, s, 64, 2, 3);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 3);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 3);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 3);
++
++#undef CMT
++#define CMT " (overflow test: shift by 3, with max input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh_3, CMT);
++
++
++  /* Use minimum negative input for signed types.  */
++  VDUP(vector, , int, s, 8, 8, 0x80);
++  VDUP(vector, , int, s, 16, 4, 0x8000);
++  VDUP(vector, , int, s, 32, 2, 0x80000000);
++  VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);
++  VDUP(vector, , uint, u, 8, 8, 0xFF);
++  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector, q, int, s, 8, 16, 0x80);
++  VDUP(vector, q, int, s, 16, 8, 0x8000);
++  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++  VDUP(vector, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++
++  /* Use 1 as shift amount, to exercise saturation code.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 1);
++  TEST_VRSHR_N(, int, s, 16, 4, 1);
++  TEST_VRSHR_N(, int, s, 32, 2, 1);
++  TEST_VRSHR_N(, int, s, 64, 1, 1);
++  TEST_VRSHR_N(, uint, u, 8, 8, 1);
++  TEST_VRSHR_N(, uint, u, 16, 4, 1);
++  TEST_VRSHR_N(, uint, u, 32, 2, 1);
++  TEST_VRSHR_N(, uint, u, 64, 1, 1);
++  TEST_VRSHR_N(q, int, s, 8, 16, 1);
++  TEST_VRSHR_N(q, int, s, 16, 8, 1);
++  TEST_VRSHR_N(q, int, s, 32, 4, 1);
++  TEST_VRSHR_N(q, int, s, 64, 2, 1);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 1);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 1);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 1);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 1);
++
++#undef CMT
++#define CMT " (overflow test: shift by 1, with negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_neg_sh_1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_neg_sh_1, CMT);
++
++
++  /* Use 3 as shift amount, to exercise saturation code.  */
++  TEST_VRSHR_N(, int, s, 8, 8, 3);
++  TEST_VRSHR_N(, int, s, 16, 4, 3);
++  TEST_VRSHR_N(, int, s, 32, 2, 3);
++  TEST_VRSHR_N(, int, s, 64, 1, 3);
++  TEST_VRSHR_N(, uint, u, 8, 8, 3);
++  TEST_VRSHR_N(, uint, u, 16, 4, 3);
++  TEST_VRSHR_N(, uint, u, 32, 2, 3);
++  TEST_VRSHR_N(, uint, u, 64, 1, 3);
++  TEST_VRSHR_N(q, int, s, 8, 16, 3);
++  TEST_VRSHR_N(q, int, s, 16, 8, 3);
++  TEST_VRSHR_N(q, int, s, 32, 4, 3);
++  TEST_VRSHR_N(q, int, s, 64, 2, 3);
++  TEST_VRSHR_N(q, uint, u, 8, 16, 3);
++  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
++  TEST_VRSHR_N(q, uint, u, 32, 4, 3);
++  TEST_VRSHR_N(q, uint, u, 64, 2, 3);
++
++#undef CMT
++#define CMT " (overflow test: shift by 3, with negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_neg_sh_3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_neg_sh_3, CMT);
++}
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
++int main (void)
++{
++  exec_vrshr_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrshrn_n.c
+@@ -0,0 +1,143 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
++/* Expected results with input=0.  */
++VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
 +
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
++				       0xfa, 0xfb, 0xfb, 0xfc };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd,
++					0xfd, 0xfd, 0xfe, 0xfe };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
++
++/* Expected results with large shift amount.  */
++VECT_VAR_DECL(expected_sh_large,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_large,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_large,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_large,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_large,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_sh_large,uint,32,2) [] = { 0x0, 0x0 };
 +
-+  DECL_VARIABLE(vector, int, 16, 4);
-+  DECL_VARIABLE(vector, int, 32, 2);
++#define TEST_MSG "VRSHRN_N"
++void exec_vrshrn_n (void)
++{
++  /* Basic test: v2=vrshrn_n(v1,v), then store the result.  */
++#define TEST_VRSHRN_N(T1, T2, W, N, W2, V)				\
++  VECT_VAR(vector_res, T1, W2, N) =					\
++    vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		     V);						\
++  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
++
++  /* vector is twice as large as vector_res.  */
 +  DECL_VARIABLE(vector, int, 16, 8);
 +  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++  DECL_VARIABLE(vector, uint, 16, 8);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, uint, 64, 2);
 +
++  DECL_VARIABLE(vector_res, int, 8, 8);
 +  DECL_VARIABLE(vector_res, int, 16, 4);
 +  DECL_VARIABLE(vector_res, int, 32, 2);
-+  DECL_VARIABLE(vector_res, int, 16, 8);
-+  DECL_VARIABLE(vector_res, int, 32, 4);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
 +
 +  clean_results ();
 +
-+  VLOAD(vector, buffer, , int, s, 16, 4);
-+  VLOAD(vector, buffer, , int, s, 32, 2);
++  /* Fill input vector with 0, to check behavior on limits.  */
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0);
++  VDUP(vector, q, uint, u, 32, 4, 0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
++
++  /* Choose shift amount arbitrarily.  */
++  TEST_VRSHRN_N(int, s, 16, 8, 8, 1);
++  TEST_VRSHRN_N(int, s, 32, 4, 16, 1);
++  TEST_VRSHRN_N(int, s, 64, 2, 32, 2);
++  TEST_VRSHRN_N(uint, u, 16, 8, 8, 2);
++  TEST_VRSHRN_N(uint, u, 32, 4, 16, 3);
++  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
++
++#define CMT " (with input = 0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
++
++
++  /* Test again, with predefined input values.  */
 +  VLOAD(vector, buffer, q, int, s, 16, 8);
 +  VLOAD(vector, buffer, q, int, s, 32, 4);
++  VLOAD(vector, buffer, q, int, s, 64, 2);
++  VLOAD(vector, buffer, q, uint, u, 16, 8);
++  VLOAD(vector, buffer, q, uint, u, 32, 4);
++  VLOAD(vector, buffer, q, uint, u, 64, 2);
 +
-+  /* Choose multiplier arbitrarily.  */
-+#define CMT ""
-+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT);
-+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT);
++  /* Choose shift amount arbitrarily.  */
++  TEST_VRSHRN_N(int, s, 16, 8, 8, 1);
++  TEST_VRSHRN_N(int, s, 32, 4, 16, 1);
++  TEST_VRSHRN_N(int, s, 64, 2, 32, 2);
++  TEST_VRSHRN_N(uint, u, 16, 8, 8, 2);
++  TEST_VRSHRN_N(uint, u, 32, 4, 16, 3);
++  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
 +
++#undef CMT
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
 +  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
 +  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
-+
-+  /* Now use input values such that the multiplication causes
-+     saturation.  */
-+#define TEST_MSG_MUL " (check mul cumulative saturation)"
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+
-+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul,
-+		  TEST_MSG_MUL);
-+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul,
-+		  TEST_MSG_MUL);
-+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul,
-+		  TEST_MSG_MUL);
-+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul,
-+		  TEST_MSG_MUL);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
 +
-+  /* Use input values where rounding produces a result equal to the
-+     saturation value, but does not set the saturation flag.  */
-+#define TEST_MSG_ROUND " (check rounding)"
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
++  /* Fill input arbitrary values.  */
++  VDUP(vector, q, int, s, 16, 8, 30);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0xFFF0);
++  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
 +
-+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round,
-+		  TEST_MSG_ROUND);
-+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round,
-+		  TEST_MSG_ROUND);
-+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round,
-+		  TEST_MSG_ROUND);
-+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round,
-+		  TEST_MSG_ROUND);
++  /* Choose large shift amount arbitrarily.  */
++  TEST_VRSHRN_N(int, s, 16, 8, 8, 7);
++  TEST_VRSHRN_N(int, s, 32, 4, 16, 14);
++  TEST_VRSHRN_N(int, s, 64, 2, 32, 31);
++  TEST_VRSHRN_N(uint, u, 16, 8, 8, 7);
++  TEST_VRSHRN_N(uint, u, 32, 4, 16, 16);
++  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
 +
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
++#undef CMT
++#define CMT " (with large shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh_large, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh_large, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh_large, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh_large, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqrdmulh_n ();
++  exec_vrshrn_n ();
 +  return 0;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c
-@@ -0,0 +1,1090 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c
+@@ -0,0 +1,157 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
++#include <math.h>
 +
-+/* Expected values of cumulative_saturation flag with input=0.  */
-+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
++/* Expected results.  */
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000,
++					 0x9c800000, 0x9c800000 };
++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 };
++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000,
++					   0x3e700000, 0x3e700000 };
++
++/* Expected results with large uint #1.  */
++VECT_VAR_DECL(expected_1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_1,uint,32,4) [] = { 0xae800000, 0xae800000,
++					   0xae800000, 0xae800000 };
++
++/* Expected results with large uint #2.  */
++VECT_VAR_DECL(expected_2,uint,32,2) [] = { 0xb4800000, 0xb4800000 };
++VECT_VAR_DECL(expected_2,uint,32,4) [] = { 0xed000000, 0xed000000,
++					   0xed000000, 0xed000000 };
++
++/* Expected results with FP special inputs values (NaNs, ...).  */
++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
++					       0x7f800000, 0x7f800000 };
++
++/* Expected results with FP special inputs values
++   (negative, infinity).  */
++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected results with FP special inputs values
++   (-0, -infinity).  */
++VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
++VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
++					       0x7fc00000, 0x7fc00000 };
 +
-+/* Expected results with input=0.  */
-+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
++#define TEST_MSG "VRSQRTE/VRSQRTEQ"
++void exec_vrsqrte(void)
++{
++  int i;
 +
-+/* Expected values of cumulative_saturation flag with input=0 and
-+   negative shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
++  /* Basic test: y=vrsqrte(x), then store the result.  */
++#define TEST_VRSQRTE(Q, T1, T2, W, N)			\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
++		    VECT_VAR(vector_res, T1, W, N))
 +
-+/* Expected results with input=0 and negative shift amount.  */
-+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
++  DECL_VARIABLE(vector, uint, 32, 2);
++  DECL_VARIABLE(vector, float, 32, 2);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, float, 32, 4);
++
++  DECL_VARIABLE(vector_res, uint, 32, 2);
++  DECL_VARIABLE(vector_res, float, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 32, 4);
++  DECL_VARIABLE(vector_res, float, 32, 4);
++
++  clean_results ();
++
++  /* Choose init value arbitrarily.  */
++  VDUP(vector, , uint, u, 32, 2, 0x12345678);
++  VDUP(vector, , float, f, 32, 2, 25.799999f);
++  VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10);
++  VDUP(vector, q, float, f, 32, 4, 18.2f);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, uint, u, 32, 2);
++  TEST_VRSQRTE(, float, f, 32, 2);
++  TEST_VRSQRTE(q, uint, u, 32, 4);
++  TEST_VRSQRTE(q, float, f, 32, 4);
++
++#define CMT ""
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT);
++
++
++  /* Don't test FP variants with negative inputs.  */
++  /* Use input with various values of bits 30 and 31.  */
++  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector, q, uint, u, 32, 4, 0x89081234);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, uint, u, 32, 2);
++  TEST_VRSQRTE(q, uint, u, 32, 4);
++
++#undef CMT
++#define CMT " (large uint #1)"
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_1, CMT);
++
++
++  /* Choose init value arbitrarily.  */
++  VDUP(vector, , uint, u, 32, 2, 0x80000000);
++  VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, uint, u, 32, 2);
++  TEST_VRSQRTE(q, uint, u, 32, 4);
++
++#undef CMT
++#define CMT " (large uint #2)"
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_2, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_2, CMT);
++
++
++  /* Test FP variants with special input values (NaNs, ...).  */
++  VDUP(vector, , float, f, 32, 2, NAN);
++  VDUP(vector, q, float, f, 32, 4, 0.0f);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, float, f, 32, 2);
++  TEST_VRSQRTE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (NaN, 0)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
++
++
++  /* Test FP variants with special input values (negative, infinity).  */
++  VDUP(vector, , float, f, 32, 2, -1.0f);
++  VDUP(vector, q, float, f, 32, 4, HUGE_VALF);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, float, f, 32, 2);
++  TEST_VRSQRTE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (negative, infinity)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
++
++  /* Test FP variants with special input values (-0, -infinity).  */
++  VDUP(vector, , float, f, 32, 2, -0.0f);
++  VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTE(, float, f, 32, 2);
++  TEST_VRSQRTE(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (-0, -infinity)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT);
++}
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++int main (void)
++{
++  exec_vrsqrte ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c
+@@ -0,0 +1,118 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++#include <math.h>
 +
 +/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
-+				       0xe8, 0xea, 0xec, 0xee };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
-+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
-+					0x8000, 0x8000, 0x8000, 0x8000 };
-+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
-+					0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 };
-+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+					 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
-+					 0xffffffffffffffff };
++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 };
++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8,
++					   0xc0e4a3d8, 0xc0e4a3d8 };
++
++/* Expected results with input=NaN.  */
++VECT_VAR_DECL(expected_nan,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
++VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
++					       0x7fc00000, 0x7fc00000 };
++
++/* Expected results with FP special inputs values (infinity, 0).  */
++VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
++VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000,
++					       0x3fc00000, 0x3fc00000 };
++
++/* Expected results with only FP special inputs values (infinity,
++   0).  */
++VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 };
++VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000,
++					       0x3fc00000, 0x3fc00000 };
 +
-+/* Expected values of cumulative_saturation flag with negative shift
-+   amount.  */
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
++#define TEST_MSG "VRSQRTS/VRSQRTSQ"
++void exec_vrsqrts(void)
++{
++  int i;
 +
-+/* Expected results with negative shift amount.  */
-+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd,
-+					   0xfd, 0xfd, 0xfe, 0xfe };
-+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd };
-+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
-+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d,
-+					    0x3d, 0x3d, 0x3e, 0x3e };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
-+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
-+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2,
-+					     0x2, 0x2, 0x2, 0x2,
-+					     0x2, 0x2, 0x2, 0x2,
-+					     0x2, 0x2, 0x2, 0x2 };
-+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20,
-+					     0x20, 0x20, 0x20, 0x20 };
-+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000,
-+					     0x80000, 0x80000 };
-+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 };
++  /* Basic test: y=vrsqrts(x), then store the result.  */
++#define TEST_VRSQRTS(Q, T1, T2, W, N)			\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
++			 VECT_VAR(vector2, T1, W, N));	\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
++		    VECT_VAR(vector_res, T1, W, N))
++
++  /* No need for integer variants.  */
++  DECL_VARIABLE(vector, float, 32, 2);
++  DECL_VARIABLE(vector, float, 32, 4);
 +
-+/* Expected values of cumulative_saturation flag with input=max and
-+   shift by -1.  */
-+int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0;
++  DECL_VARIABLE(vector2, float, 32, 2);
++  DECL_VARIABLE(vector2, float, 32, 4);
 +
-+/* Expected results with input=max and shift by -1.  */
-+VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
-+					      0x40, 0x40, 0x40, 0x40 };
-+VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
-+VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 };
-+VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 };
-+VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
-+					       0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 };
-+VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
-+					       0x40, 0x40, 0x40, 0x40,
-+					       0x40, 0x40, 0x40, 0x40,
++  DECL_VARIABLE(vector_res, float, 32, 2);
++  DECL_VARIABLE(vector_res, float, 32, 4);
++
++  clean_results ();
++
++  /* Choose init value arbitrarily.  */
++  VDUP(vector, , float, f, 32, 2, 12.9f);
++  VDUP(vector, q, float, f, 32, 4, 9.1f);
++
++  VDUP(vector2, , float, f, 32, 2, 9.9f);
++  VDUP(vector2, q, float, f, 32, 4, 1.9f);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTS(, float, f, 32, 2);
++  TEST_VRSQRTS(q, float, f, 32, 4);
++
++#define CMT ""
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT);
++
++
++  /* Test FP variants with special input values (NaN).  */
++  VDUP(vector, , float, f, 32, 2, NAN);
++  VDUP(vector2, q, float, f, 32, 4, NAN);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTS(, float, f, 32, 2);
++  TEST_VRSQRTS(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (NAN) and normal values"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_nan, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, CMT);
++
++
++  /* Test FP variants with special input values (infinity, 0).  */
++  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
++  VDUP(vector, q, float, f, 32, 4, 0.0f);
++  /* Restore a normal value in vector2.  */
++  VDUP(vector2, q, float, f, 32, 4, 3.2f);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTS(, float, f, 32, 2);
++  TEST_VRSQRTS(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " FP special (infinity, 0) and normal values"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
++
++
++  /* Test FP variants with only special input values (infinity, 0).  */
++  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
++  VDUP(vector, q, float, f, 32, 4, 0.0f);
++  VDUP(vector2, , float, f, 32, 2, -0.0f);
++  VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
++
++  /* Apply the operator.  */
++  TEST_VRSQRTS(, float, f, 32, 2);
++  TEST_VRSQRTS(q, float, f, 32, 4);
++
++#undef CMT
++#define CMT " only FP special (infinity, 0)"
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
++}
++
++int main (void)
++{
++  exec_vrsqrts ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsra_n.c
+@@ -0,0 +1,553 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
++
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf9, 0xfa, 0xfb, 0xfc,
++				       0xfd, 0xfe, 0xff, 0x0 };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffd, 0xfffffffe };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8,
++					0x9, 0xa, 0xb, 0xc };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffd, 0xfffe, 0xffff, 0x0 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xfa, 0xfb, 0xfc,
++					0xfd, 0xfe, 0xff, 0x0,
++					0x1, 0x2, 0x3, 0x4,
++					0x5, 0x6, 0x7, 0x8 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffe,
++					0xffffffff, 0x0 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8,
++					 0x9, 0xa, 0xb, 0xc,
++					 0xd, 0xe, 0xf, 0x10,
++					 0x11, 0x12, 0x13, 0x14 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
++					 0x1, 0x2, 0x3, 0x4 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
++					 0xfffffff6, 0xfffffff7 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
++					 0xfffffffffffffff1 };
++
++/* Expected results with max input and shift by 1.  */
++VECT_VAR_DECL(expected_max_sh1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
 +					       0x40, 0x40, 0x40, 0x40 };
-+VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,
-+					       0x4000, 0x4000, 0x4000, 0x4000 };
-+VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000,
-+					       0x40000000, 0x40000000 };
-+VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000,
-+					       0x4000000000000000 };
-+VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
-+						0x80, 0x80, 0x80, 0x80,
-+						0x80, 0x80, 0x80, 0x80,
++VECT_VAR_DECL(expected_max_sh1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh1,int,32,2) [] = { 0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh1,int,64,1) [] = { 0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
 +						0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
-+						0x8000, 0x8000, 0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000,
-+						0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000,
-+						0x8000000000000000 };
-+
-+/* Expected values of cumulative_saturation flag with input=max and
-+   shift by -3.  */
-+int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0;
++VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0x8000, 0x8000,
++						 0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x8000000000000000 };
++VECT_VAR_DECL(expected_max_sh1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
++						0x40, 0x40, 0x40, 0x40,
++						0x40, 0x40, 0x40, 0x40,
++						0x40, 0x40, 0x40, 0x40 };
++VECT_VAR_DECL(expected_max_sh1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,
++						0x4000, 0x4000, 0x4000, 0x4000 };
++VECT_VAR_DECL(expected_max_sh1,int,32,4) [] = { 0x40000000, 0x40000000,
++						0x40000000, 0x40000000 };
++VECT_VAR_DECL(expected_max_sh1,int,64,2) [] = { 0x4000000000000000,
++						0x4000000000000000 };
++VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
++						 0x80, 0x80, 0x80, 0x80,
++						 0x80, 0x80, 0x80, 0x80,
++						 0x80, 0x80, 0x80, 0x80 };
++VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x8000, 0x8000,
++						 0x8000, 0x8000,
++						 0x8000, 0x8000,
++						 0x8000, 0x8000 };
++VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x80000000, 0x80000000,
++						 0x80000000, 0x80000000 };
++VECT_VAR_DECL(expected_max_sh1,uint,64,2) [] = { 0x8000000000000000,
++						 0x8000000000000000 };
 +
-+/* Expected results with input=max and shift by -3.  */
-+VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
-+					      0x10, 0x10, 0x10, 0x10 };
-+VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 };
-+VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 };
-+VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 };
-+VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
-+					       0x20, 0x20, 0x20, 0x20 };
-+VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 };
-+VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 };
-+VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 };
-+VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
-+					       0x10, 0x10, 0x10, 0x10,
-+					       0x10, 0x10, 0x10, 0x10,
++/* Expected results with max input and shift by 3.  */
++VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
 +					       0x10, 0x10, 0x10, 0x10 };
-+VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000,
-+					       0x1000, 0x1000, 0x1000, 0x1000 };
-+VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000,
-+					       0x10000000, 0x10000000 };
-+VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000,
-+					       0x1000000000000000 };
-+VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
-+						0x20, 0x20, 0x20, 0x20,
-+						0x20, 0x20, 0x20, 0x20,
++VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh3,int,64,1) [] = { 0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
 +						0x20, 0x20, 0x20, 0x20 };
-+VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000,
-+						0x2000, 0x2000, 0x2000, 0x2000 };
-+VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000,
-+						0x20000000, 0x20000000 };
-+VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000,
-+						0x2000000000000000 };
++VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0x2000, 0x2000,
++						 0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh3,uint,64,1) [] = { 0x2000000000000000 };
++VECT_VAR_DECL(expected_max_sh3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
++						0x10, 0x10, 0x10, 0x10,
++						0x10, 0x10, 0x10, 0x10,
++						0x10, 0x10, 0x10, 0x10 };
++VECT_VAR_DECL(expected_max_sh3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000,
++						0x1000, 0x1000, 0x1000, 0x1000 };
++VECT_VAR_DECL(expected_max_sh3,int,32,4) [] = { 0x10000000, 0x10000000,
++						0x10000000, 0x10000000 };
++VECT_VAR_DECL(expected_max_sh3,int,64,2) [] = { 0x1000000000000000,
++						0x1000000000000000 };
++VECT_VAR_DECL(expected_max_sh3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
++						 0x20, 0x20, 0x20, 0x20,
++						 0x20, 0x20, 0x20, 0x20,
++						 0x20, 0x20, 0x20, 0x20 };
++VECT_VAR_DECL(expected_max_sh3,uint,16,8) [] = { 0x2000, 0x2000,
++						 0x2000, 0x2000,
++						 0x2000, 0x2000,
++						 0x2000, 0x2000 };
++VECT_VAR_DECL(expected_max_sh3,uint,32,4) [] = { 0x20000000, 0x20000000,
++						 0x20000000, 0x20000000 };
++VECT_VAR_DECL(expected_max_sh3,uint,64,2) [] = { 0x2000000000000000,
++						 0x2000000000000000 };
++
++/* Expected results with max input and shift by type width.  */
++VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						  0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_max_shmax,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_max_shmax,uint,64,2) [] = { 0x1, 0x1 };
++
++/* Expected results with min negative input and shift by 1.  */
++VECT_VAR_DECL(expected_min_sh1,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0,
++					       0xc0, 0xc0, 0xc0, 0xc0 };
++VECT_VAR_DECL(expected_min_sh1,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 };
++VECT_VAR_DECL(expected_min_sh1,int,32,2) [] = { 0xc0000000, 0xc0000000 };
++VECT_VAR_DECL(expected_min_sh1,int,64,1) [] = { 0xc000000000000000 };
++VECT_VAR_DECL(expected_min_sh1,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_min_sh1,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0,
++						0xc0, 0xc0, 0xc0, 0xc0,
++						0xc0, 0xc0, 0xc0, 0xc0,
++						0xc0, 0xc0, 0xc0, 0xc0 };
++VECT_VAR_DECL(expected_min_sh1,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000,
++						0xc000, 0xc000, 0xc000, 0xc000 };
++VECT_VAR_DECL(expected_min_sh1,int,32,4) [] = { 0xc0000000, 0xc0000000,
++						0xc0000000, 0xc0000000 };
++VECT_VAR_DECL(expected_min_sh1,int,64,2) [] = { 0xc000000000000000,
++						0xc000000000000000 };
++VECT_VAR_DECL(expected_min_sh1,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh1,uint,64,2) [] = { 0x1, 0x1 };
++
++/* Expected results with min negative input and shift by 3.  */
++VECT_VAR_DECL(expected_min_sh3,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
++					       0xf0, 0xf0, 0xf0, 0xf0 };
++VECT_VAR_DECL(expected_min_sh3,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 };
++VECT_VAR_DECL(expected_min_sh3,int,32,2) [] = { 0xf0000000, 0xf0000000 };
++VECT_VAR_DECL(expected_min_sh3,int,64,1) [] = { 0xf000000000000000 };
++VECT_VAR_DECL(expected_min_sh3,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_min_sh3,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
++						0xf0, 0xf0, 0xf0, 0xf0,
++						0xf0, 0xf0, 0xf0, 0xf0,
++						0xf0, 0xf0, 0xf0, 0xf0 };
++VECT_VAR_DECL(expected_min_sh3,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000,
++						0xf000, 0xf000, 0xf000, 0xf000 };
++VECT_VAR_DECL(expected_min_sh3,int,32,4) [] = { 0xf0000000, 0xf0000000,
++						0xf0000000, 0xf0000000 };
++VECT_VAR_DECL(expected_min_sh3,int,64,2) [] = { 0xf000000000000000,
++						0xf000000000000000 };
++VECT_VAR_DECL(expected_min_sh3,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_sh3,uint,64,2) [] = { 0x1, 0x1 };
++
++/* Expected results with min negative input and shift by type width.  */
++VECT_VAR_DECL(expected_min_shmax,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,64,1) [] = { 0x0 };
++VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						  0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,64,1) [] = { 0x1 };
++VECT_VAR_DECL(expected_min_shmax,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++						  0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,int,64,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_min_shmax,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1,
++						   0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 };
++VECT_VAR_DECL(expected_min_shmax,uint,64,2) [] = { 0x1, 0x1 };
++
++#define TEST_MSG "VRSRA_N"
++void exec_vrsra_n (void)
++{
++  /* Basic test: y=vrsra_n(x,v), then store the result.  */
++#define TEST_VRSRA_N(Q, T1, T2, W, N, V)				\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vrsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			 VECT_VAR(vector2, T1, W, N),			\
++			 V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
++
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector2);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++
++  clean_results ();
++
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose arbitrary initialization values.  */
++  VDUP(vector2, , int, s, 8, 8, 0x11);
++  VDUP(vector2, , int, s, 16, 4, 0x22);
++  VDUP(vector2, , int, s, 32, 2, 0x33);
++  VDUP(vector2, , int, s, 64, 1, 0x44);
++  VDUP(vector2, , uint, u, 8, 8, 0x55);
++  VDUP(vector2, , uint, u, 16, 4, 0x66);
++  VDUP(vector2, , uint, u, 32, 2, 0x77);
++  VDUP(vector2, , uint, u, 64, 1, 0x88);
++
++  VDUP(vector2, q, int, s, 8, 16, 0x11);
++  VDUP(vector2, q, int, s, 16, 8, 0x22);
++  VDUP(vector2, q, int, s, 32, 4, 0x33);
++  VDUP(vector2, q, int, s, 64, 2, 0x44);
++  VDUP(vector2, q, uint, u, 8, 16, 0x55);
++  VDUP(vector2, q, uint, u, 16, 8, 0x66);
++  VDUP(vector2, q, uint, u, 32, 4, 0x77);
++  VDUP(vector2, q, uint, u, 64, 2, 0x88);
++
++  /* Choose shift amount arbitrarily.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 1);
++  TEST_VRSRA_N(, int, s, 16, 4, 12);
++  TEST_VRSRA_N(, int, s, 32, 2, 2);
++  TEST_VRSRA_N(, int, s, 64, 1, 32);
++  TEST_VRSRA_N(, uint, u, 8, 8, 2);
++  TEST_VRSRA_N(, uint, u, 16, 4, 3);
++  TEST_VRSRA_N(, uint, u, 32, 2, 5);
++  TEST_VRSRA_N(, uint, u, 64, 1, 33);
++
++  TEST_VRSRA_N(q, int, s, 8, 16, 1);
++  TEST_VRSRA_N(q, int, s, 16, 8, 12);
++  TEST_VRSRA_N(q, int, s, 32, 4, 2);
++  TEST_VRSRA_N(q, int, s, 64, 2, 32);
++  TEST_VRSRA_N(q, uint, u, 8, 16, 2);
++  TEST_VRSRA_N(q, uint, u, 16, 8, 3);
++  TEST_VRSRA_N(q, uint, u, 32, 4, 5);
++  TEST_VRSRA_N(q, uint, u, 64, 2, 33);
++
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
 +
-+/* Expected values of cumulative_saturation flag with input=max and
-+   large shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1;
 +
-+/* Expected results with input=max and large shift amount.  */
-+VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff,
-+						 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
-+						  0xffff, 0xffff };
-+VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						 0x7f, 0x7f, 0x7f, 0x7f,
-+						 0x7f, 0x7f, 0x7f, 0x7f,
-+						 0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff,
-+						 0x7fff, 0x7fff,
-+						 0x7fff, 0x7fff,
-+						 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+						 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff,
-+						 0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+						  0xff, 0xff, 0xff, 0xff,
-+						  0xff, 0xff, 0xff, 0xff,
-+						  0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
-+						  0xffff, 0xffff,
-+						  0xffff, 0xffff,
-+						  0xffff, 0xffff };
-+VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+						  0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
-+						  0xffffffffffffffff };
++  /* Initialize the accumulator with 0.  */
++  VDUP(vector, , int, s, 8, 8, 0);
++  VDUP(vector, , int, s, 16, 4, 0);
++  VDUP(vector, , int, s, 32, 2, 0);
++  VDUP(vector, , int, s, 64, 1, 0);
++  VDUP(vector, , uint, u, 8, 8, 0);
++  VDUP(vector, , uint, u, 16, 4, 0);
++  VDUP(vector, , uint, u, 32, 2, 0);
++  VDUP(vector, , uint, u, 64, 1, 0);
++  VDUP(vector, q, int, s, 8, 16, 0);
++  VDUP(vector, q, int, s, 16, 8, 0);
++  VDUP(vector, q, int, s, 32, 4, 0);
++  VDUP(vector, q, int, s, 64, 2, 0);
++  VDUP(vector, q, uint, u, 8, 16, 0);
++  VDUP(vector, q, uint, u, 16, 8, 0);
++  VDUP(vector, q, uint, u, 32, 4, 0);
++  VDUP(vector, q, uint, u, 64, 2, 0);
 +
-+/* Expected values of cumulative_saturation flag with negative input and
-+   large shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1;
++  /* Initialize with max values to check overflow.  */
++  VDUP(vector2, , int, s, 8, 8, 0x7F);
++  VDUP(vector2, , int, s, 16, 4, 0x7FFF);
++  VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF);
++  VDUP(vector2, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector2, , uint, u, 8, 8, 0xFF);
++  VDUP(vector2, , uint, u, 16, 4, 0xFFFF);
++  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFFF);
++  VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
++  VDUP(vector2, q, int, s, 8, 16, 0x7F);
++  VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
++  VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
++  VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++  VDUP(vector2, q, uint, u, 8, 16, 0xFF);
++  VDUP(vector2, q, uint, u, 16, 8, 0xFFFF);
++  VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF);
++  VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++
++  /* Shift by 1 to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 1);
++  TEST_VRSRA_N(, int, s, 16, 4, 1);
++  TEST_VRSRA_N(, int, s, 32, 2, 1);
++  TEST_VRSRA_N(, int, s, 64, 1, 1);
++  TEST_VRSRA_N(, uint, u, 8, 8, 1);
++  TEST_VRSRA_N(, uint, u, 16, 4, 1);
++  TEST_VRSRA_N(, uint, u, 32, 2, 1);
++  TEST_VRSRA_N(, uint, u, 64, 1, 1);
++  TEST_VRSRA_N(q, int, s, 8, 16, 1);
++  TEST_VRSRA_N(q, int, s, 16, 8, 1);
++  TEST_VRSRA_N(q, int, s, 32, 4, 1);
++  TEST_VRSRA_N(q, int, s, 64, 2, 1);
++  TEST_VRSRA_N(q, uint, u, 8, 16, 1);
++  TEST_VRSRA_N(q, uint, u, 16, 8, 1);
++  TEST_VRSRA_N(q, uint, u, 32, 4, 1);
++  TEST_VRSRA_N(q, uint, u, 64, 2, 1);
 +
-+/* Expected results with negative input and large shift amount.  */
-+VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
-+						    0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000,
-+						     0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						     0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
-+						      0xffff, 0xffff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff,
-+						      0xffffffff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
-+						     0x80, 0x80, 0x80, 0x80,
-+						     0x80, 0x80, 0x80, 0x80,
-+						     0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000,
-+						     0x8000, 0x8000,
-+						     0x8000, 0x8000,
-+						     0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000,
-+						     0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000,
-+						     0x8000000000000000 };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+						      0xff, 0xff, 0xff, 0xff,
-+						      0xff, 0xff, 0xff, 0xff,
-+						      0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
-+						      0xffff, 0xffff,
-+						      0xffff, 0xffff,
-+						      0xffff, 0xffff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff,
-+						      0xffffffff,
-+						      0xffffffff,
-+						      0xffffffff };
-+VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
-+						      0xffffffffffffffff };
++#undef CMT
++#define CMT " (checking overflow: shift by 1, max input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh1, CMT);
++
++
++  /* Shift by 3 to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 3);
++  TEST_VRSRA_N(, int, s, 16, 4, 3);
++  TEST_VRSRA_N(, int, s, 32, 2, 3);
++  TEST_VRSRA_N(, int, s, 64, 1, 3);
++  TEST_VRSRA_N(, uint, u, 8, 8, 3);
++  TEST_VRSRA_N(, uint, u, 16, 4, 3);
++  TEST_VRSRA_N(, uint, u, 32, 2, 3);
++  TEST_VRSRA_N(, uint, u, 64, 1, 3);
++  TEST_VRSRA_N(q, int, s, 8, 16, 3);
++  TEST_VRSRA_N(q, int, s, 16, 8, 3);
++  TEST_VRSRA_N(q, int, s, 32, 4, 3);
++  TEST_VRSRA_N(q, int, s, 64, 2, 3);
++  TEST_VRSRA_N(q, uint, u, 8, 16, 3);
++  TEST_VRSRA_N(q, uint, u, 16, 8, 3);
++  TEST_VRSRA_N(q, uint, u, 32, 4, 3);
++  TEST_VRSRA_N(q, uint, u, 64, 2, 3);
 +
-+/* Expected values of cumulative_saturation flag with max/min input and
-+   large negative shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0;
++#undef CMT
++#define CMT " (checking overflow: shift by 3, max input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_sh3, CMT);
++
++
++  /* Shift by max to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 8);
++  TEST_VRSRA_N(, int, s, 16, 4, 16);
++  TEST_VRSRA_N(, int, s, 32, 2, 32);
++  TEST_VRSRA_N(, int, s, 64, 1, 64);
++  TEST_VRSRA_N(, uint, u, 8, 8, 8);
++  TEST_VRSRA_N(, uint, u, 16, 4, 16);
++  TEST_VRSRA_N(, uint, u, 32, 2, 32);
++  TEST_VRSRA_N(, uint, u, 64, 1, 64);
++  TEST_VRSRA_N(q, int, s, 8, 16, 8);
++  TEST_VRSRA_N(q, int, s, 16, 8, 16);
++  TEST_VRSRA_N(q, int, s, 32, 4, 32);
++  TEST_VRSRA_N(q, int, s, 64, 2, 64);
++  TEST_VRSRA_N(q, uint, u, 8, 16, 8);
++  TEST_VRSRA_N(q, uint, u, 16, 8, 16);
++  TEST_VRSRA_N(q, uint, u, 32, 4, 32);
++  TEST_VRSRA_N(q, uint, u, 64, 2, 64);
 +
-+/* Expected results with max/min input and large negative shift amount.  */
-+VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+						     0x0, 0x0, 0x0, 0x0,
-+						     0x0, 0x0, 0x0, 0x0,
-+						     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+						      0x0, 0x0, 0x0, 0x0,
-+						      0x0, 0x0, 0x0, 0x0,
-+						      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
++#undef CMT
++#define CMT " (checking overflow: shift by max, max input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_shmax, CMT);
 +
-+/* Expected values of cumulative_saturation flag with input=0 and
-+   large negative shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0;
 +
-+/* Expected results with input=0 and large negative shift amount.  */
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+						       0x0, 0x0, 0x0, 0x0,
-+						       0x0, 0x0, 0x0, 0x0,
-+						       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+							0x0, 0x0, 0x0, 0x0,
-+							0x0, 0x0, 0x0, 0x0,
-+							0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+							0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
++  /* Initialize with min values to check overflow.  */
++  VDUP(vector2, , int, s, 8, 8, 0x80);
++  VDUP(vector2, , int, s, 16, 4, 0x8000);
++  VDUP(vector2, , int, s, 32, 2, 0x80000000);
++  VDUP(vector2, , int, s, 64, 1, 0x8000000000000000LL);
++  VDUP(vector2, q, int, s, 8, 16, 0x80);
++  VDUP(vector2, q, int, s, 16, 8, 0x8000);
++  VDUP(vector2, q, int, s, 32, 4, 0x80000000);
++  VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000ULL);
 +
-+#define INSN vqrshl
-+#define TEST_MSG "VQRSHL/VQRSHLQ"
++  /* Shift by 1 to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 1);
++  TEST_VRSRA_N(, int, s, 16, 4, 1);
++  TEST_VRSRA_N(, int, s, 32, 2, 1);
++  TEST_VRSRA_N(, int, s, 64, 1, 1);
++  TEST_VRSRA_N(q, int, s, 8, 16, 1);
++  TEST_VRSRA_N(q, int, s, 16, 8, 1);
++  TEST_VRSRA_N(q, int, s, 32, 4, 1);
++  TEST_VRSRA_N(q, int, s, 64, 2, 1);
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++#undef CMT
++#define CMT " (checking overflow: shift by 1, min negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_sh1, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_sh1, CMT);
++
++
++  /* Shift by 3 to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 3);
++  TEST_VRSRA_N(, int, s, 16, 4, 3);
++  TEST_VRSRA_N(, int, s, 32, 2, 3);
++  TEST_VRSRA_N(, int, s, 64, 1, 3);
++  TEST_VRSRA_N(q, int, s, 8, 16, 3);
++  TEST_VRSRA_N(q, int, s, 16, 8, 3);
++  TEST_VRSRA_N(q, int, s, 32, 4, 3);
++  TEST_VRSRA_N(q, int, s, 64, 2, 3);
++
++#undef CMT
++#define CMT " (checking overflow: shift by 3, min negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_sh3, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_sh3, CMT);
++
++
++  /* Shift by max to check overflow with rounding constant.  */
++  TEST_VRSRA_N(, int, s, 8, 8, 8);
++  TEST_VRSRA_N(, int, s, 16, 4, 16);
++  TEST_VRSRA_N(, int, s, 32, 2, 32);
++  TEST_VRSRA_N(, int, s, 64, 1, 64);
++  TEST_VRSRA_N(q, int, s, 8, 16, 8);
++  TEST_VRSRA_N(q, int, s, 16, 8, 16);
++  TEST_VRSRA_N(q, int, s, 32, 4, 32);
++  TEST_VRSRA_N(q, int, s, 64, 2, 64);
 +
-+FNNAME (INSN)
-+{
-+  /* Basic test: v3=vqrshl(v1,v2), then store the result.  */
-+#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+		      VECT_VAR(vector_shift, T3, W, N));		\
-+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++#undef CMT
++#define CMT " (checking overflow: shift by max, min negative input)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_shmax, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_min_shmax, CMT);
++}
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++int main (void)
++{
++  exec_vrsra_n ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc
+@@ -68,7 +68,24 @@ void FNNAME (INSN_NAME) (void)
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 3);
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 12);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+ 
+ #ifdef EXTRA_TESTS
+   EXTRA_TESTS();
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
+@@ -0,0 +1,99 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
-+  TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++				       0xf4, 0xf5, 0xf6, 0x11 };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x22 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0x44 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					0xf4, 0xf5, 0x55, 0xf7 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 };
++VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					0xf4, 0xf5, 0x55, 0xf7 };
++VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 };
++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					0xf4, 0xf5, 0xf6, 0xf7,
++					0xf8, 0xf9, 0xfa, 0xfb,
++					0xfc, 0xfd, 0xfe, 0x99 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					0xfff4, 0xaa, 0xfff6, 0xfff7 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					0xfffffff2, 0xbb };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xcc };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					 0xf4, 0xf5, 0xf6, 0xf7,
++					 0xf8, 0xf9, 0xfa, 0xfb,
++					 0xfc, 0xfd, 0xdd, 0xff };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					 0xfff4, 0xfff5, 0xee, 0xfff7 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					 0xff, 0xfffffff3 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x11 };
++VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					 0xf4, 0xf5, 0xf6, 0xf7,
++					 0xf8, 0xf9, 0xfa, 0xfb,
++					 0xfc, 0xfd, 0xdd, 0xff };
++VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					 0xfff4, 0xfff5, 0xee, 0xfff7 };
++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
++					   0xc1600000, 0x41333333 };
++
++#define TEST_MSG "VSET_LANE/VSET_LANEQ"
++void exec_vset_lane (void)
++{
++  /* vec=vset_lane(val, vec, lane), then store the result.  */
++#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L)				\
++  VECT_VAR(vector, T1, W, N) =						\
++    vset##Q##_lane_##T2##W(V,						\
++			   VECT_VAR(vector, T1, W, N),			\
++			   L);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
 +
 +  DECL_VARIABLE_ALL_VARIANTS(vector);
-+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
-+
-+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
 +
 +  clean_results ();
 +
-+  /* Fill input vector with 0, to check saturation on limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0);
-+  VDUP(vector, , int, s, 16, 4, 0);
-+  VDUP(vector, , int, s, 32, 2, 0);
-+  VDUP(vector, , int, s, 64, 1, 0);
-+  VDUP(vector, , uint, u, 8, 8, 0);
-+  VDUP(vector, , uint, u, 16, 4, 0);
-+  VDUP(vector, , uint, u, 32, 2, 0);
-+  VDUP(vector, , uint, u, 64, 1, 0);
-+  VDUP(vector, q, int, s, 8, 16, 0);
-+  VDUP(vector, q, int, s, 16, 8, 0);
-+  VDUP(vector, q, int, s, 32, 4, 0);
-+  VDUP(vector, q, int, s, 64, 2, 0);
-+  VDUP(vector, q, uint, u, 8, 16, 0);
-+  VDUP(vector, q, uint, u, 16, 8, 0);
-+  VDUP(vector, q, uint, u, 32, 4, 0);
-+  VDUP(vector, q, uint, u, 64, 2, 0);
-+
-+  /* Choose init value arbitrarily, will be used as shift amount */
-+  /* Use values equal to or one-less-than the type width to check
-+     behaviour on limits.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 7);
-+  VDUP(vector_shift, , int, s, 16, 4, 15);
-+  VDUP(vector_shift, , int, s, 32, 2, 31);
-+  VDUP(vector_shift, , int, s, 64, 1, 63);
-+  VDUP(vector_shift, q, int, s, 8, 16, 8);
-+  VDUP(vector_shift, q, int, s, 16, 8, 16);
-+  VDUP(vector_shift, q, int, s, 32, 4, 32);
-+  VDUP(vector_shift, q, int, s, 64, 2, 64);
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++  VLOAD(vector, buffer, , float, f, 32, 2);
++  VLOAD(vector, buffer, q, float, f, 32, 4);
 +
-+#define CMT " (with input = 0)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
++  /* Choose value and lane arbitrarily.  */
++  TEST_VSET_LANE(, int, s, 8, 8, 0x11, 7);
++  TEST_VSET_LANE(, int, s, 16, 4, 0x22, 3);
++  TEST_VSET_LANE(, int, s, 32, 2, 0x33, 1);
++  TEST_VSET_LANE(, int, s, 64, 1, 0x44, 0);
++  TEST_VSET_LANE(, uint, u, 8, 8, 0x55, 6);
++  TEST_VSET_LANE(, uint, u, 16, 4, 0x66, 2);
++  TEST_VSET_LANE(, uint, u, 32, 2, 0x77, 1);
++  TEST_VSET_LANE(, uint, u, 64, 1, 0x88, 0);
++  TEST_VSET_LANE(, poly, p, 8, 8, 0x55, 6);
++  TEST_VSET_LANE(, poly, p, 16, 4, 0x66, 2);
++  TEST_VSET_LANE(, float, f, 32, 2, 33.2f, 1);
++
++  TEST_VSET_LANE(q, int, s, 8, 16, 0x99, 15);
++  TEST_VSET_LANE(q, int, s, 16, 8, 0xAA, 5);
++  TEST_VSET_LANE(q, int, s, 32, 4, 0xBB, 3);
++  TEST_VSET_LANE(q, int, s, 64, 2, 0xCC, 1);
++  TEST_VSET_LANE(q, uint, u, 8, 16, 0xDD, 14);
++  TEST_VSET_LANE(q, uint, u, 16, 8, 0xEE, 6);
++  TEST_VSET_LANE(q, uint, u, 32, 4, 0xFF, 2);
++  TEST_VSET_LANE(q, uint, u, 64, 2, 0x11, 1);
++  TEST_VSET_LANE(q, poly, p, 8, 16, 0xDD, 14);
++  TEST_VSET_LANE(q, poly, p, 16, 8, 0xEE, 6);
++  TEST_VSET_LANE(q, float, f, 32, 4, 11.2f, 3);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
++  CHECK_RESULTS(TEST_MSG, "");
++}
 +
++int main (void)
++{
++  exec_vset_lane ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c
+@@ -13,11 +13,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333,
+-					 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+ 					0x80, 0xa0, 0xc0, 0xe0,
+ 					0x0, 0x20, 0x40, 0x60,
+@@ -36,14 +31,6 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000,
+ 					 0x80000000, 0xc0000000 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results with large shift amount.  */
+ VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+@@ -56,11 +43,6 @@ VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+ VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 };
+ VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 };
+-VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-						    0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333,
+-						     0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 						    0x0, 0x0, 0x0, 0x0,
+ 						    0x0, 0x0, 0x0, 0x0,
+@@ -77,16 +59,6 @@ VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 						     0x0, 0x0, 0x0, 0x0 };
+ VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+ VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 };
+-VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-						     0x33, 0x33, 0x33, 0x33,
+-						     0x33, 0x33, 0x33, 0x33,
+-						     0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333,
+-						     0x3333, 0x3333,
+-						     0x3333, 0x3333,
+-						     0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						       0x33333333, 0x33333333 };
+ 
+ 
+ /* Expected results with negative shift amount.  */
+@@ -103,12 +75,6 @@ VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8,
+ VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc,
+ 							0x3ffffffc };
+ VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff };
+-VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-						       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333,
+-							0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333,
+-							  0x33333333 };
+ VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
+ 						       0xfd, 0xfd, 0xfd, 0xfd,
+ 						       0xfe, 0xfe, 0xfe, 0xfe,
+@@ -133,18 +99,6 @@ VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe,
+ 							0x1ffffffe, 0x1ffffffe };
+ VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff,
+ 							0x7ffffffffffffff };
+-VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-							0x33, 0x33, 0x33, 0x33,
+-							0x33, 0x33, 0x33, 0x33,
+-							0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333,
+-							0x3333, 0x3333,
+-							0x3333, 0x3333,
+-							0x3333, 0x3333 };
+-VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333,
+-							  0x33333333,
+-							  0x33333333,
+-							  0x33333333 };
+ 
+ 
+ #ifndef INSN_NAME
+@@ -187,7 +141,22 @@ void FNNAME (INSN_NAME) (void)
+   /* Execute the tests.  */
+   TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+ 
+-  CHECK_RESULTS (TEST_MSG, "");
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+ 
+ 
+   /* Test large shift amount (larger or equal to the type width.  */
+@@ -203,7 +172,23 @@ void FNNAME (INSN_NAME) (void)
+   /* Execute the tests.  */
+   TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)");
++#define COMMENT1 "(large shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_shift, COMMENT1);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_shift, COMMENT1);
+ 
+ 
+   /* Test negative shift amount. */
+@@ -219,7 +204,23 @@ void FNNAME (INSN_NAME) (void)
+   /* Execute the tests.  */
+   TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)");
++#define COMMENT2 "(negative shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative_shift, COMMENT2);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_negative_shift, COMMENT2);
+ }
+ 
+ int main (void)
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl_n.c
+@@ -0,0 +1,96 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* Use negative shift amounts.  */
-+  VDUP(vector_shift, , int, s, 8, 8, -1);
-+  VDUP(vector_shift, , int, s, 16, 4, -2);
-+  VDUP(vector_shift, , int, s, 32, 2, -3);
-+  VDUP(vector_shift, , int, s, 64, 1, -4);
-+  VDUP(vector_shift, q, int, s, 8, 16, -7);
-+  VDUP(vector_shift, q, int, s, 16, 8, -11);
-+  VDUP(vector_shift, q, int, s, 32, 4, -13);
-+  VDUP(vector_shift, q, int, s, 64, 2, -20);
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
++				       0xe8, 0xea, 0xec, 0xee };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffff80, 0xffffff88 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
++					0xd0, 0xd4, 0xd8, 0xdc };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff00, 0xff10, 0xff20, 0xff30 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff80, 0xffffff88 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffe0 };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
++					0x80, 0xa0, 0xc0, 0xe0,
++					0x0, 0x20, 0x40, 0x60,
++					0x80, 0xa0, 0xc0, 0xe0 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
++					0xffe8, 0xffea, 0xffec, 0xffee };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffc0, 0xffffffc4,
++					0xffffffc8, 0xffffffcc };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
++					 0xd0, 0xd4, 0xd8, 0xdc,
++					 0xe0, 0xe4, 0xe8, 0xec,
++					 0xf0, 0xf4, 0xf8, 0xfc };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff80, 0xff88, 0xff90, 0xff98,
++					 0xffa0, 0xffa8, 0xffb0, 0xffb8 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc4,
++					 0xffffffc8, 0xffffffcc };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffe0,
++					 0xffffffffffffffe2 };
 +
-+#undef CMT
-+#define CMT " (input 0 and negative shift amount)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
++#define TEST_MSG "VSHL_N"
++void exec_vshl_n (void)
++{
++  /* Basic test: v2=vshl_n(v1,v), then store the result.  */
++#define TEST_VSHL_N(Q, T1, T2, W, N, V)					\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vshl##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
++  clean_results ();
 +
-+  /* Test again, with predefined input values.  */
++  /* Initialize input "vector" from "buffer".  */
 +  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
 +
-+  /* Choose init value arbitrarily, will be used as shift amount.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 1);
-+  VDUP(vector_shift, , int, s, 16, 4, 3);
-+  VDUP(vector_shift, , int, s, 32, 2, 8);
-+  VDUP(vector_shift, , int, s, 64, 1, 3);
-+  VDUP(vector_shift, q, int, s, 8, 16, 10);
-+  VDUP(vector_shift, q, int, s, 16, 8, 12);
-+  VDUP(vector_shift, q, int, s, 32, 4, 31);
-+  VDUP(vector_shift, q, int, s, 64, 2, 63);
++  /* Choose shift amount arbitrarily.  */
++  TEST_VSHL_N(, int, s, 8, 8, 1);
++  TEST_VSHL_N(, int, s, 16, 4, 1);
++  TEST_VSHL_N(, int, s, 32, 2, 3);
++  TEST_VSHL_N(, int, s, 64, 1, 2);
++  TEST_VSHL_N(, uint, u, 8, 8, 2);
++  TEST_VSHL_N(, uint, u, 16, 4, 4);
++  TEST_VSHL_N(, uint, u, 32, 2, 3);
++  TEST_VSHL_N(, uint, u, 64, 1, 1);
++
++  TEST_VSHL_N(q, int, s, 8, 16, 5);
++  TEST_VSHL_N(q, int, s, 16, 8, 1);
++  TEST_VSHL_N(q, int, s, 32, 4, 2);
++  TEST_VSHL_N(q, int, s, 64, 2, 2);
++  TEST_VSHL_N(q, uint, u, 8, 16, 2);
++  TEST_VSHL_N(q, uint, u, 16, 8, 3);
++  TEST_VSHL_N(q, uint, u, 32, 4, 2);
++  TEST_VSHL_N(q, uint, u, 64, 2, 1);
 +
-+#undef CMT
 +#define CMT ""
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
-+
 +  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
 +  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
 +  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
@@ -8848,2416 +27073,3619 @@
 +  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
 +  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
 +  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++}
 +
++int main (void)
++{
++  exec_vshl_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshll_n.c
+@@ -0,0 +1,56 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* Use negative shift amounts.  */
-+  VDUP(vector_shift, , int, s, 8, 8, -2);
-+  VDUP(vector_shift, , int, s, 16, 4, -2);
-+  VDUP(vector_shift, , int, s, 32, 2, -3);
-+  VDUP(vector_shift, , int, s, 64, 1, -4);
-+  VDUP(vector_shift, q, int, s, 8, 16, -7);
-+  VDUP(vector_shift, q, int, s, 16, 8, -11);
-+  VDUP(vector_shift, q, int, s, 32, 4, -13);
-+  VDUP(vector_shift, q, int, s, 64, 2, -20);
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
++					0xffe8, 0xffea, 0xffec, 0xffee };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,
++					0xffffffe4, 0xffffffe6 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffff80, 0xffffffffffffff88 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3c0, 0x3c4, 0x3c8, 0x3cc,
++					 0x3d0, 0x3d4, 0x3d8, 0x3dc };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff00, 0xfff10, 0xfff20, 0xfff30 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffff80, 0x7ffffff88 };
 +
-+#undef CMT
-+#define CMT " (negative shift amount)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
++#define TEST_MSG "VSHLL_N"
++void exec_vshll_n (void)
++{
++  /* Basic test: v2=vshll_n(v1,v), then store the result.  */
++#define TEST_VSHLL_N(T1, T2, W, W2, N, V)				\
++  VECT_VAR(vector_res, T1, W2, N) =					\
++    vshll##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++		      V);						\
++  vst1q##_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
++  clean_results ();
 +
-+  /* Fill input vector with max value, to check saturation on
-+     limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0x7F);
-+  VDUP(vector, , int, s, 16, 4, 0x7FFF);
-+  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
-+  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, , uint, u, 8, 8, 0xFF);
-+  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
-+  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
-+  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
-+  VDUP(vector, q, int, s, 8, 16, 0x7F);
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, uint, u, 8, 16, 0xFF);
-+  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
-+  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
-+  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
 +
-+  /* Use -1 shift amount to check cumulative saturation with
-+     round_const.  */
-+  VDUP(vector_shift, , int, s, 8, 8, -1);
-+  VDUP(vector_shift, , int, s, 16, 4, -1);
-+  VDUP(vector_shift, , int, s, 32, 2, -1);
-+  VDUP(vector_shift, , int, s, 64, 1, -1);
-+  VDUP(vector_shift, q, int, s, 8, 16, -1);
-+  VDUP(vector_shift, q, int, s, 16, 8, -1);
-+  VDUP(vector_shift, q, int, s, 32, 4, -1);
-+  VDUP(vector_shift, q, int, s, 64, 2, -1);
++  /* Choose shift amount arbitrarily.  */
++  TEST_VSHLL_N(int, s, 8, 16, 8, 1);
++  TEST_VSHLL_N(int, s, 16, 32, 4, 1);
++  TEST_VSHLL_N(int, s, 32, 64, 2, 3);
++  TEST_VSHLL_N(uint, u, 8, 16, 8, 2);
++  TEST_VSHLL_N(uint, u, 16, 32, 4, 4);
++  TEST_VSHLL_N(uint, u, 32, 64, 2, 3);
 +
 +#undef CMT
-+#define CMT " (checking cumulative saturation: shift by -1)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT);
-+
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT);
++#define CMT ""
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++}
 +
++int main (void)
++{
++  exec_vshll_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshr_n.c
+@@ -0,0 +1,95 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* Use -3 shift amount to check cumulative saturation with
-+     round_const. */
-+  VDUP(vector_shift, , int, s, 8, 8, -3);
-+  VDUP(vector_shift, , int, s, 16, 4, -3);
-+  VDUP(vector_shift, , int, s, 32, 2, -3);
-+  VDUP(vector_shift, , int, s, 64, 1, -3);
-+  VDUP(vector_shift, q, int, s, 8, 16, -3);
-+  VDUP(vector_shift, q, int, s, 16, 8, -3);
-+  VDUP(vector_shift, q, int, s, 32, 4, -3);
-+  VDUP(vector_shift, q, int, s, 64, 2, -3);
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
++				       0xfa, 0xfa, 0xfb, 0xfb };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
++					0x3d, 0x3d, 0x3d, 0x3d };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff, 0x7ffffff };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffff };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
++					0xfa, 0xfa, 0xfb, 0xfb,
++					0xfc, 0xfc, 0xfd, 0xfd,
++					0xfe, 0xfe, 0xff, 0xff };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
++					0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc,
++					0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
++					 0x3d, 0x3d, 0x3d, 0x3d,
++					 0x3e, 0x3e, 0x3e, 0x3e,
++					 0x3f, 0x3f, 0x3f, 0x3f };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe,
++					 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffffff, 0x7ffffff,
++					 0x7ffffff, 0x7ffffff };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffff, 0x7fffffff };
 +
-+#undef CMT
-+#define CMT " (checking cumulative saturation: shift by -3)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT);
++#define TEST_MSG "VSHR_N"
++void exec_vshr_n (void)
++{
++  /* Basic test: y=vshr_n(x,v), then store the result.  */
++#define TEST_VSHR_N(Q, T1, T2, W, N, V)					\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
++  clean_results ();
 +
-+  /* Use large shift amount.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 10);
-+  VDUP(vector_shift, , int, s, 16, 4, 20);
-+  VDUP(vector_shift, , int, s, 32, 2, 40);
-+  VDUP(vector_shift, , int, s, 64, 1, 70);
-+  VDUP(vector_shift, q, int, s, 8, 16, 10);
-+  VDUP(vector_shift, q, int, s, 16, 8, 20);
-+  VDUP(vector_shift, q, int, s, 32, 4, 40);
-+  VDUP(vector_shift, q, int, s, 64, 2, 70);
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
 +
-+#undef CMT
-+#define CMT " (checking cumulative saturation: large shift amount)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT);
++  /* Choose shift amount arbitrarily.  */
++  TEST_VSHR_N(, int, s, 8, 8, 1);
++  TEST_VSHR_N(, int, s, 16, 4, 12);
++  TEST_VSHR_N(, int, s, 32, 2, 2);
++  TEST_VSHR_N(, int, s, 64, 1, 32);
++  TEST_VSHR_N(, uint, u, 8, 8, 2);
++  TEST_VSHR_N(, uint, u, 16, 4, 3);
++  TEST_VSHR_N(, uint, u, 32, 2, 5);
++  TEST_VSHR_N(, uint, u, 64, 1, 33);
++
++  TEST_VSHR_N(q, int, s, 8, 16, 1);
++  TEST_VSHR_N(q, int, s, 16, 8, 12);
++  TEST_VSHR_N(q, int, s, 32, 4, 2);
++  TEST_VSHR_N(q, int, s, 64, 2, 32);
++  TEST_VSHR_N(q, uint, u, 8, 16, 2);
++  TEST_VSHR_N(q, uint, u, 16, 8, 3);
++  TEST_VSHR_N(q, uint, u, 32, 4, 5);
++  TEST_VSHR_N(q, uint, u, 64, 2, 33);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT);
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++}
 +
++int main (void)
++{
++  exec_vshr_n ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshrn_n.c
+@@ -0,0 +1,70 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* Fill input vector with negative values, to check saturation on
-+     limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0x80);
-+  VDUP(vector, , int, s, 16, 4, 0x8000);
-+  VDUP(vector, , int, s, 32, 2, 0x80000000);
-+  VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);
-+  VDUP(vector, q, int, s, 8, 16, 0x80);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
++				       0xfa, 0xfa, 0xfb, 0xfb };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
++					0xfd, 0xfd, 0xfd, 0xfd };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
 +
-+  /* Use large shift amount.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 10);
-+  VDUP(vector_shift, , int, s, 16, 4, 20);
-+  VDUP(vector_shift, , int, s, 32, 2, 40);
-+  VDUP(vector_shift, , int, s, 64, 1, 70);
-+  VDUP(vector_shift, q, int, s, 8, 16, 10);
-+  VDUP(vector_shift, q, int, s, 16, 8, 20);
-+  VDUP(vector_shift, q, int, s, 32, 4, 40);
-+  VDUP(vector_shift, q, int, s, 64, 2, 70);
++#define TEST_MSG "VSHRN_N"
++void exec_vshrn_n (void)
++{
++  /* Basic test: y=vshrn_n(x,v), then store the result.  */
++#define TEST_VSHRN_N(T1, T2, W, W2, N, V)				\
++  VECT_VAR(vector_res, T1, W2, N) =					\
++    vshrn_n_##T2##W(VECT_VAR(vector, T1, W, N),				\
++		    V);							\
++  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
 +
-+#undef CMT
-+#define CMT " (checking cumulative saturation: large shift amount with negative input)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
++  /* vector is twice as large as vector_res.  */
++  DECL_VARIABLE(vector, int, 16, 8);
++  DECL_VARIABLE(vector, int, 32, 4);
++  DECL_VARIABLE(vector, int, 64, 2);
++  DECL_VARIABLE(vector, uint, 16, 8);
++  DECL_VARIABLE(vector, uint, 32, 4);
++  DECL_VARIABLE(vector, uint, 64, 2);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT);
++  DECL_VARIABLE(vector_res, int, 8, 8);
++  DECL_VARIABLE(vector_res, int, 16, 4);
++  DECL_VARIABLE(vector_res, int, 32, 2);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 16, 4);
++  DECL_VARIABLE(vector_res, uint, 32, 2);
 +
++  clean_results ();
 +
-+  /* Fill input vector with negative and positive values, to check
-+   * saturation on limits */
-+  VDUP(vector, , int, s, 8, 8, 0x7F);
-+  VDUP(vector, , int, s, 16, 4, 0x7FFF);
-+  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
-+  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, int, s, 8, 16, 0x80);
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++  VLOAD(vector, buffer, q, int, s, 16, 8);
++  VLOAD(vector, buffer, q, int, s, 32, 4);
++  VLOAD(vector, buffer, q, int, s, 64, 2);
++  VLOAD(vector, buffer, q, uint, u, 16, 8);
++  VLOAD(vector, buffer, q, uint, u, 32, 4);
++  VLOAD(vector, buffer, q, uint, u, 64, 2);
 +
-+  /* Use large negative shift amount  */
-+  VDUP(vector_shift, , int, s, 8, 8, -10);
-+  VDUP(vector_shift, , int, s, 16, 4, -20);
-+  VDUP(vector_shift, , int, s, 32, 2, -40);
-+  VDUP(vector_shift, , int, s, 64, 1, -70);
-+  VDUP(vector_shift, q, int, s, 8, 16, -10);
-+  VDUP(vector_shift, q, int, s, 16, 8, -20);
-+  VDUP(vector_shift, q, int, s, 32, 4, -40);
-+  VDUP(vector_shift, q, int, s, 64, 2, -70);
++  /* Choose shift amount arbitrarily.  */
++  TEST_VSHRN_N(int, s, 16, 8, 8, 1);
++  TEST_VSHRN_N(int, s, 32, 16, 4, 1);
++  TEST_VSHRN_N(int, s, 64, 32, 2, 2);
++  TEST_VSHRN_N(uint, u, 16, 8, 8, 2);
++  TEST_VSHRN_N(uint, u, 32, 16, 4, 3);
++  TEST_VSHRN_N(uint, u, 64, 32, 2, 3);
 +
-+#undef CMT
-+#define CMT " (checking cumulative saturation: large negative shift amount)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++#define CMT ""
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++}
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++int main (void)
++{
++  exec_vshrn_n ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
+@@ -120,15 +120,40 @@ void FNNAME (INSN_NAME) (void)
+   TEST_EXTRA_CHUNK(poly, 16, 8, 1);		\
+   TEST_EXTRA_CHUNK(float, 32, 4, 1)
+ 
++  /* vshuffle support all vector types except [u]int64x1 and
++     [u]int64x2.  */
++#define CHECK_RESULTS_VSHUFFLE(test_name,EXPECTED,comment)		\
++  {									\
++    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
++									\
++    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
++    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
++    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
++    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
++  }									\
++
+   clean_results ();
+ 
+   /* Execute the tests.  */
+   TEST_ALL_VSHUFFLE(INSN_NAME);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)");
++  CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
+ 
+   TEST_ALL_EXTRA_CHUNKS();
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)");
++  CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
+ }
+ 
+ int main (void)
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c
+@@ -23,7 +23,6 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 };
+ VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53,
+ 					0x50, 0x51, 0x52, 0x53 };
+ VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xd0, 0xd1, 0xd2, 0xd3,
+ 					0xd4, 0xd5, 0xd6, 0xd7,
+ 					0xd8, 0xd9, 0xda, 0xdb,
+@@ -48,8 +47,6 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0x61, 0x62, 0x63,
+ 					 0x64, 0x65, 0x66, 0x67 };
+ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3,
+ 					 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results with max shift amount.  */
+ VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0x70, 0x71, 0x72, 0x73,
+@@ -68,7 +65,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0x70, 0x71, 0x72, 0x73,
+ 						  0x74, 0x75, 0x76, 0x77 };
+ VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0x7ff0, 0x7ff1,
+ 						   0x7ff2, 0x7ff3 };
+-VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0x70, 0x71, 0x72, 0x73,
+ 						  0x74, 0x75, 0x76, 0x77,
+ 						  0x78, 0x79, 0x7a, 0x7b,
+@@ -95,8 +91,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0x70, 0x71, 0x72, 0x73,
+ 						   0x7c, 0x7d, 0x7e, 0x7f };
+ VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+-VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						     0x33333333, 0x33333333 };
+ 
+ #include "vsXi_n.inc"
+ 
+@@ -158,5 +152,23 @@ void vsli_extra(void)
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 7);
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 15);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)");
++#define COMMENT "(max shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT);
+ }
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsra_n.c
+@@ -0,0 +1,117 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
++/* Expected results.  */
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
++				       0xfc, 0xfd, 0xfe, 0xff };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8,
++					0x9, 0xa, 0xb, 0xc };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff3, 0xfffffff4 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
++					0xfc, 0xfd, 0xfe, 0xff,
++					0x0, 0x1, 0x2, 0x3,
++					0x4, 0x5, 0x6, 0x7 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
++					0xfffffffe, 0xffffffff };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
++					0xfffffffffffffff1 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8,
++					 0x9, 0xa, 0xb, 0xc,
++					 0xd, 0xe, 0xf, 0x10,
++					 0x11, 0x12, 0x13, 0x14 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff,
++					 0x0, 0x1, 0x2, 0x3 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0xfffffff4,
++					 0xfffffff5, 0xfffffff6 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
++					 0xfffffffffffffff1 };
 +
-+  /* Fill input vector with 0, to check saturation in case of large
-+   * shift amount */
-+  VDUP(vector, , int, s, 8, 8, 0);
-+  VDUP(vector, , int, s, 16, 4, 0);
-+  VDUP(vector, , int, s, 32, 2, 0);
-+  VDUP(vector, , int, s, 64, 1, 0);
-+  VDUP(vector, q, int, s, 8, 16, 0);
-+  VDUP(vector, q, int, s, 16, 8, 0);
-+  VDUP(vector, q, int, s, 32, 4, 0);
-+  VDUP(vector, q, int, s, 64, 2, 0);
++#define TEST_MSG "VSRA_N"
++void exec_vsra_n (void)
++{
++  /* Basic test: y=vsra_n(x,v), then store the result.  */
++#define TEST_VSRA_N(Q, T1, T2, W, N, V)					\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
++			VECT_VAR(vector2, T1, W, N),			\
++			V);						\
++  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
 +
-+  /* Use large shift amount  */
-+  VDUP(vector_shift, , int, s, 8, 8, -10);
-+  VDUP(vector_shift, , int, s, 16, 4, -20);
-+  VDUP(vector_shift, , int, s, 32, 2, -40);
-+  VDUP(vector_shift, , int, s, 64, 1, -70);
-+  VDUP(vector_shift, q, int, s, 8, 16, -10);
-+  VDUP(vector_shift, q, int, s, 16, 8, -20);
-+  VDUP(vector_shift, q, int, s, 32, 4, -40);
-+  VDUP(vector_shift, q, int, s, 64, 2, -70);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector2);
++  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
-+#undef CMT
-+#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)"
-+  TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
-+  TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
++  clean_results ();
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose arbitrary initialization values.  */
++  VDUP(vector2, , int, s, 8, 8, 0x11);
++  VDUP(vector2, , int, s, 16, 4, 0x22);
++  VDUP(vector2, , int, s, 32, 2, 0x33);
++  VDUP(vector2, , int, s, 64, 1, 0x44);
++  VDUP(vector2, , uint, u, 8, 8, 0x55);
++  VDUP(vector2, , uint, u, 16, 4, 0x66);
++  VDUP(vector2, , uint, u, 32, 2, 0x77);
++  VDUP(vector2, , uint, u, 64, 1, 0x88);
++
++  VDUP(vector2, q, int, s, 8, 16, 0x11);
++  VDUP(vector2, q, int, s, 16, 8, 0x22);
++  VDUP(vector2, q, int, s, 32, 4, 0x33);
++  VDUP(vector2, q, int, s, 64, 2, 0x44);
++  VDUP(vector2, q, uint, u, 8, 16, 0x55);
++  VDUP(vector2, q, uint, u, 16, 8, 0x66);
++  VDUP(vector2, q, uint, u, 32, 4, 0x77);
++  VDUP(vector2, q, uint, u, 64, 2, 0x88);
++
++  /* Choose shift amount arbitrarily.  */
++  TEST_VSRA_N(, int, s, 8, 8, 1);
++  TEST_VSRA_N(, int, s, 16, 4, 12);
++  TEST_VSRA_N(, int, s, 32, 2, 2);
++  TEST_VSRA_N(, int, s, 64, 1, 32);
++  TEST_VSRA_N(, uint, u, 8, 8, 2);
++  TEST_VSRA_N(, uint, u, 16, 4, 3);
++  TEST_VSRA_N(, uint, u, 32, 2, 5);
++  TEST_VSRA_N(, uint, u, 64, 1, 33);
++
++  TEST_VSRA_N(q, int, s, 8, 16, 1);
++  TEST_VSRA_N(q, int, s, 16, 8, 12);
++  TEST_VSRA_N(q, int, s, 32, 4, 2);
++  TEST_VSRA_N(q, int, s, 64, 2, 32);
++  TEST_VSRA_N(q, uint, u, 8, 16, 2);
++  TEST_VSRA_N(q, uint, u, 16, 8, 3);
++  TEST_VSRA_N(q, uint, u, 32, 4, 5);
++  TEST_VSRA_N(q, uint, u, 64, 2, 33);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
++  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
 +}
 +
 +int main (void)
 +{
-+  exec_vqrshl ();
++  exec_vsra_n ();
 +  return 0;
 +}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c
+@@ -23,7 +23,6 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xe000000000000000 };
+ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5,
+ 					0xc5, 0xc5, 0xc5, 0xc5 };
+ VECT_VAR_DECL(expected,poly,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
+ 					0xf7, 0xf7, 0xf7, 0xf7,
+ 					0xff, 0xff, 0xff, 0xff,
+@@ -50,8 +49,6 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1,
+ 					 0xe1, 0xe1, 0xe1, 0xe1 };
+ VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+ 					 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ /* Expected results with max shift amount.  */
+ VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+@@ -70,7 +67,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						  0xf4, 0xf5, 0xf6, 0xf7 };
+ VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0xfff0, 0xfff1,
+ 						   0xfff2, 0xfff3 };
+-VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						  0xf4, 0xf5, 0xf6, 0xf7,
+ 						  0xf8, 0xf9, 0xfa, 0xfb,
+@@ -97,8 +93,6 @@ VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 						   0xfc, 0xfd, 0xfe, 0xff };
+ VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+-VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-						     0x33333333, 0x33333333 };
+ 
+ #include "vsXi_n.inc"
+ 
+@@ -160,5 +154,23 @@ void vsri_extra(void)
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 8);
+   TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 16);
+ 
+-  CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)");
++#define COMMENT "(max shift amount)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT);
+ }
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c
-@@ -0,0 +1,174 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
+@@ -0,0 +1,93 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
-+
 +/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
-+				       0xfa, 0xfb, 0xfb, 0xfc };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33,
++				       0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33333333 };
++VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
++					0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x33333333 };
++VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
++VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
++					0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
++VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
++					0x33, 0x33, 0x33, 0x33,
++					0x33, 0x33, 0x33, 0x33,
++					0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333,
++					0x3333, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333,
++					0x33333333, 0x33333333 };
++VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 };
++VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
++					 0x3333, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x33333333,
++					 0x33333333, 0x33333333 };
++VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
++					 0x3333333333333333 };
++VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33,
++					 0x33, 0x33, 0x33, 0x33 };
++VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
++					 0x3333, 0x3333, 0x3333, 0x3333 };
++VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
++					   0x33333333, 0x33333333 };
++
++#define TEST_MSG "VST1_LANE/VST1_LANEQ"
++void exec_vst1_lane (void)
++{
++#define TEST_VST1_LANE(Q, T1, T2, W, N, L)		\
++  VECT_VAR(vector, T1, W, N) =				\
++    vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));	\
++  vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N),	\
++			 VECT_VAR(vector, T1, W, N), L)
 +
-+/* Expected values of cumulative_saturation flag with shift by 3.  */
-+int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1;
++  DECL_VARIABLE_ALL_VARIANTS(vector);
 +
-+/* Expected results with shift by 3.  */
-+VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					   0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++  clean_results ();
 +
-+/* Expected values of cumulative_saturation flag with shift by max
-+   amount.  */
-+int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1;
++  /* Choose lane arbitrarily.  */
++  TEST_VST1_LANE(, int, s, 8, 8, 7);
++  TEST_VST1_LANE(, int, s, 16, 4, 3);
++  TEST_VST1_LANE(, int, s, 32, 2, 1);
++  TEST_VST1_LANE(, int, s, 64, 1, 0);
++  TEST_VST1_LANE(, uint, u, 8, 8, 6);
++  TEST_VST1_LANE(, uint, u, 16, 4, 2);
++  TEST_VST1_LANE(, uint, u, 32, 2, 0);
++  TEST_VST1_LANE(, uint, u, 64, 1, 0);
++  TEST_VST1_LANE(, poly, p, 8, 8, 6);
++  TEST_VST1_LANE(, poly, p, 16, 4, 2);
++  TEST_VST1_LANE(, float, f, 32, 2, 1);
++
++  TEST_VST1_LANE(q, int, s, 8, 16, 15);
++  TEST_VST1_LANE(q, int, s, 16, 8, 5);
++  TEST_VST1_LANE(q, int, s, 32, 4, 1);
++  TEST_VST1_LANE(q, int, s, 64, 2, 1);
++  TEST_VST1_LANE(q, uint, u, 8, 16, 10);
++  TEST_VST1_LANE(q, uint, u, 16, 8, 4);
++  TEST_VST1_LANE(q, uint, u, 32, 4, 3);
++  TEST_VST1_LANE(q, uint, u, 64, 2, 0);
++  TEST_VST1_LANE(q, poly, p, 8, 16, 10);
++  TEST_VST1_LANE(q, poly, p, 16, 8, 4);
++  TEST_VST1_LANE(q, float, f, 32, 4, 1);
 +
-+/* Expected results with shift by max amount.  */
-+VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					     0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					      0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++  CHECK_RESULTS(TEST_MSG, "");
++}
 +
-+#define INSN vqrshrn_n
-+#define TEST_MSG "VQRSHRN_N"
++int main (void)
++{
++  exec_vst1_lane ();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst2_lane_f32 (float32_t * p, float32x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_f32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++void
++f_vst2_lane_f64 (float64_t * p, float64x1x2_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+FNNAME (INSN)
++void
++f_vst2_lane_p8 (poly8_t * p, poly8x8x2_t v)
 +{
-+  /* Basic test: y=vqrshrn_n(x,v), then store the result.  */
-+#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));		\
-+  VECT_VAR(vector_res, T1, W2, N) =					\
-+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
-+		   V);							\
-+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),				\
-+		VECT_VAR(vector_res, T1, W2, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++void
++f_vst2_lane_s16 (int16_t * p, int16x4x2_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* vector is twice as large as vector_res.  */
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
-+  DECL_VARIABLE(vector, int, 64, 2);
-+  DECL_VARIABLE(vector, uint, 16, 8);
-+  DECL_VARIABLE(vector, uint, 32, 4);
-+  DECL_VARIABLE(vector, uint, 64, 2);
++void
++f_vst2_lane_s32 (int32_t * p, int32x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  DECL_VARIABLE(vector_res, int, 8, 8);
-+  DECL_VARIABLE(vector_res, int, 16, 4);
-+  DECL_VARIABLE(vector_res, int, 32, 2);
-+  DECL_VARIABLE(vector_res, uint, 8, 8);
-+  DECL_VARIABLE(vector_res, uint, 16, 4);
-+  DECL_VARIABLE(vector_res, uint, 32, 2);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  clean_results ();
++void
++f_vst2_lane_s64 (int64_t * p, int64x1x2_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  VLOAD(vector, buffer, q, int, s, 16, 8);
-+  VLOAD(vector, buffer, q, int, s, 32, 4);
-+  VLOAD(vector, buffer, q, int, s, 64, 2);
-+  VLOAD(vector, buffer, q, uint, u, 16, 8);
-+  VLOAD(vector, buffer, q, uint, u, 32, 4);
-+  VLOAD(vector, buffer, q, uint, u, 64, 2);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Choose shift amount arbitrarily.  */
-+#define CMT ""
-+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
++void
++f_vst2_lane_s8 (int8_t * p, int8x8x2_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
++void
++f_vst2_lane_u16 (uint16_t * p, uint16x4x2_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Another set of tests, shifting max value by 3.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
-+  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
-+  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+#undef CMT
-+#define CMT " (check saturation: shift by 3)"
-+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
-+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
-+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
-+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
-+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
-+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
++void
++f_vst2_lane_u32 (uint32_t * p, uint32x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
++void
++f_vst2_lane_u64 (uint64_t * p, uint64x1x2_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Shift by max amount.  */
-+#undef CMT
-+#define CMT " (check saturation: shift by max)"
-+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
-+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
-+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
-+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
-+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
-+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT);
++void
++f_vst2_lane_u8 (uint8_t * p, uint8x8x2_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2_lane_u8 (p, v, -1);
++  return;
 +}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+int main (void)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst2q_lane_f32 (float32_t * p, float32x4x2_t v)
 +{
-+  exec_vqrshrn_n ();
-+  return 0;
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_f32 (p, v, -1);
++  return;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c
-@@ -0,0 +1,189 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
 +#include <arm_neon.h>
-+#include "arm-neon-ref.h"
-+#include "compute-ref-data.h"
-+
-+/* Expected values of cumulative_saturation flag with negative unput.  */
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
 +
-+/* Expected results with negative input.  */
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected values of cumulative_saturation flag with max input value
-+   shifted by 1.  */
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
++void
++f_vst2q_lane_f64 (float64_t * p, float64x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+/* Expected results with max input value shifted by 1.  */
-+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
-+						 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected values of cumulative_saturation flag with max input value
-+   shifted by max amount.  */
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
++void
++f_vst2q_lane_p8 (poly8_t * p, poly8x16x2_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected results with max input value shifted by max amount.  */
-+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
-+						  0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000,
-+						   0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 };
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected values of cumulative_saturation flag with min input value
-+   shifted by max amount.  */
-+int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1;
++void
++f_vst2q_lane_s16 (int16_t * p, int16x8x2_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected results with min input value shifted by max amount.  */
-+VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+						  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 };
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected values of cumulative_saturation flag with inputs in usual
-+   range.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++void
++f_vst2q_lane_s32 (int32_t * p, int32x4x2_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+/* Expected results with inputs in usual range.  */
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49,
-+					0x49, 0x49, 0x49, 0x49 };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf };
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+#define INSN vqrshrun_n
-+#define TEST_MSG "VQRSHRUN_N"
++void
++f_vst2q_lane_s64 (int64_t * p, int64x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+FNNAME (INSN)
++void
++f_vst2q_lane_s8 (int8_t * p, int8x16x2_t v)
 +{
-+  /* Basic test: y=vqrshrun_n(x,v), then store the result.  */
-+#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N));	\
-+  VECT_VAR(vector_res, uint, W2, N) =					\
-+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
-+		   V);							\
-+  vst1_u##W2(VECT_VAR(result, uint, W2, N),				\
-+	     VECT_VAR(vector_res, uint, W2, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++void
++f_vst2q_lane_u16 (uint16_t * p, uint16x8x2_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* vector is twice as large as vector_res.  */
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
-+  DECL_VARIABLE(vector, int, 64, 2);
++void
++f_vst2q_lane_u32 (uint32_t * p, uint32x4x2_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  DECL_VARIABLE(vector_res, uint, 8, 8);
-+  DECL_VARIABLE(vector_res, uint, 16, 4);
-+  DECL_VARIABLE(vector_res, uint, 32, 2);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  clean_results ();
++void
++f_vst2q_lane_u64 (uint64_t * p, uint64x2x2_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* Fill input vector with negative values, to check saturation on
-+     limits.  */
-+  VDUP(vector, q, int, s, 16, 8, -2);
-+  VDUP(vector, q, int, s, 32, 4, -3);
-+  VDUP(vector, q, int, s, 64, 2, -4);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  /* Choose shift amount arbitrarily.   */
-+#define CMT " (negative input)"
-+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
++void
++f_vst2q_lane_u8 (uint8_t * p, uint8x16x2_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst2q_lane_u8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
++void
++f_vst3_lane_f32 (float32_t * p, float32x2x3_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_f32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* Fill input vector with max value, to check saturation on
-+     limits.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  /* shift by 1.  */
-+#undef CMT
-+#define CMT " (check cumulative saturation: shift by 1)"
-+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
-+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
-+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
++void
++f_vst3_lane_f64 (float64_t * p, float64x1x3_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
++void
++f_vst3_lane_p8 (poly8_t * p, poly8x8x3_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* shift by max.  */
-+#undef CMT
-+#define CMT " (check cumulative saturation: shift by max, positive input)"
-+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
++void
++f_vst3_lane_s16 (int16_t * p, int16x4x3_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Fill input vector with min value, to check saturation on limits.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x8000);
-+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
-+  VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
++void
++f_vst3_lane_s32 (int32_t * p, int32x2x3_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* shift by max  */
-+#undef CMT
-+#define CMT " (check cumulative saturation: shift by max, negative input)"
-+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT);
-+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT);
-+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT);
++void
++f_vst3_lane_s64 (int64_t * p, int64x1x3_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Fill input vector with positive values, to check normal case.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x1234);
-+  VDUP(vector, q, int, s, 32, 4, 0x87654321);
-+  VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
++void
++f_vst3_lane_s8 (int8_t * p, int8x8x3_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* shift arbitrary amount.  */
-+#undef CMT
-+#define CMT ""
-+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
-+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++void
++f_vst3_lane_u16 (uint16_t * p, uint16x4x3_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u16 (p, v, -1);
++  return;
 +}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+int main (void)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst3_lane_u32 (uint32_t * p, uint32x2x3_t v)
 +{
-+  exec_vqrshrun_n ();
-+  return 0;
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u32 (p, v, -1);
++  return;
 +}
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c
-@@ -0,0 +1,829 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
 +#include <arm_neon.h>
-+#include "arm-neon-ref.h"
-+#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag with input=0.  */
-+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected results with input=0.  */
-+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					  0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					   0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
++void
++f_vst3_lane_u64 (uint64_t * p, uint64x1x3_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_saturation flag with input=0 and
-+   negative shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected results with input=0 and negative shift amount.  */
-+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					      0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					       0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
++void
++f_vst3_lane_u8 (uint8_t * p, uint8x8x3_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3_lane_u8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
-+				       0xe8, 0xea, 0xec, 0xee };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
-+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };
-+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80,
-+					0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
-+					0x8000, 0x8000, 0x8000, 0x8000 };
-+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
-+					0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000,
-+					0x8000000000000000 };
-+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+					 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
-+					 0xffffffffffffffff };
++void
++f_vst3q_lane_f32 (float32_t * p, float32x4x3_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_f32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_sat_saturation flag with negative shift
-+   amount.  */
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected results with negative shift amount.  */
-+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
-+					   0xfa, 0xfa, 0xfb, 0xfb };
-+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc };
-+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
-+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
-+					    0x7a, 0x7a, 0x7b, 0x7b };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
-+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
-+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					    0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff,
-+					    0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff,
-+					    0xffffffffffffffff };
-+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
-+					     0x1, 0x1, 0x1, 0x1,
-+					     0x1, 0x1, 0x1, 0x1,
-+					     0x1, 0x1, 0x1, 0x1 };
-+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f,
-+					     0x1f, 0x1f, 0x1f, 0x1f };
-+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff,
-+					     0x7ffff, 0x7ffff };
-+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff };
++void
++f_vst3q_lane_f64 (float64_t * p, float64x2x3_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_sat_saturation flag with negative
-+   input and large shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected results with negative input and large shift amount.  */
-+VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
-+						 0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000,
-+						  0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 };
-+VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						  0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff,
-+						   0xffff, 0xffff };
-+VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
-+						  0x80, 0x80, 0x80, 0x80,
-+						  0x80, 0x80, 0x80, 0x80,
-+						  0x80, 0x80, 0x80, 0x80 };
-+VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000,
-+						  0x8000, 0x8000,
-+						  0x8000, 0x8000,
-+						  0x8000, 0x8000 };
-+VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000,
-+						  0x80000000, 0x80000000 };
-+VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000,
-+						  0x8000000000000000 };
-+VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+						   0xff, 0xff, 0xff, 0xff,
-+						   0xff, 0xff, 0xff, 0xff,
-+						   0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff,
-+						   0xffff, 0xffff,
-+						   0xffff, 0xffff,
-+						   0xffff, 0xffff };
-+VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+						   0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff,
-+						   0xffffffffffffffff };
++void
++f_vst3q_lane_p8 (poly8_t * p, poly8x16x3_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst3q_lane_s16 (int16_t * p, int16x8x3_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst3q_lane_s32 (int32_t * p, int32x4x3_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++void
++f_vst3q_lane_s64 (int64_t * p, int64x2x3_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_sat_saturation flag with max input
-+   and shift by -1.  */
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+/* Expected results with max input and shift by -1.  */
-+VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
-+						  0x3f, 0x3f, 0x3f, 0x3f };
-+VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff,
-+						   0x3fff, 0x3fff };
-+VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff };
-+VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff };
-+VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						   0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff,
-+						    0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
-+						   0x3f, 0x3f, 0x3f, 0x3f,
-+						   0x3f, 0x3f, 0x3f, 0x3f,
-+						   0x3f, 0x3f, 0x3f, 0x3f };
-+VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff,
-+						   0x3fff, 0x3fff,
-+						   0x3fff, 0x3fff,
-+						   0x3fff, 0x3fff };
-+VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff,
-+						   0x3fffffff, 0x3fffffff };
-+VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff,
-+						   0x3fffffffffffffff };
-+VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						    0x7f, 0x7f, 0x7f, 0x7f,
-+						    0x7f, 0x7f, 0x7f, 0x7f,
-+						    0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff,
-+						    0x7fff, 0x7fff,
-+						    0x7fff, 0x7fff,
-+						    0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+						    0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff,
-+						    0x7fffffffffffffff };
++void
++f_vst3q_lane_s8 (int8_t * p, int8x16x3_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_sat_saturation flag with max input
-+   and large shift amount.  */
-+int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected results with max input and large shift amount.  */
-+VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					       0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff,
-+						0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff,
-+						 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						0x7f, 0x7f, 0x7f, 0x7f,
-+						0x7f, 0x7f, 0x7f, 0x7f,
-+						0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff,
-+						0x7fff, 0x7fff,
-+						0x7fff, 0x7fff,
-+						0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+						0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff,
-+						0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+						 0xff, 0xff, 0xff, 0xff,
-+						 0xff, 0xff, 0xff, 0xff,
-+						 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff,
-+						 0xffff, 0xffff,
-+						 0xffff, 0xffff,
-+						 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+						 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff,
-+						 0xffffffffffffffff };
++void
++f_vst3q_lane_u16 (uint16_t * p, uint16x8x3_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+/* Expected values of cumulative_sat_saturation flag with saturation
-+   on 64-bits values.  */
-+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1;
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+/* Expected results with saturation on 64-bits values..  */
-+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 };
-+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff,
-+					   0x7fffffffffffffff };
++void
++f_vst3q_lane_u32 (uint32_t * p, uint32x4x3_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+#define INSN vqshl
-+#define TEST_MSG "VQSHL/VQSHLQ"
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++void
++f_vst3q_lane_u64 (uint64_t * p, uint64x2x3_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+FNNAME (INSN)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++void
++f_vst3q_lane_u8 (uint8_t * p, uint8x16x3_t v)
 +{
-+  /* Basic test: v3=vqshl(v1,v2), then store the result.  */
-+#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+		      VECT_VAR(vector_shift, T3, W, N));		\
-+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst3q_lane_u8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
-+  TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++void
++f_vst4_lane_f32 (float32_t * p, float32x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_f32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_f32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  DECL_VARIABLE_ALL_VARIANTS(vector);
-+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++void
++f_vst4_lane_f64 (float64_t * p, float64x1x4_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_f64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  clean_results ();
++void
++f_vst4_lane_p8 (poly8_t * p, poly8x8x4_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_p8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Fill input vector with 0, to check saturation on limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0);
-+  VDUP(vector, , int, s, 16, 4, 0);
-+  VDUP(vector, , int, s, 32, 2, 0);
-+  VDUP(vector, , int, s, 64, 1, 0);
-+  VDUP(vector, , uint, u, 8, 8, 0);
-+  VDUP(vector, , uint, u, 16, 4, 0);
-+  VDUP(vector, , uint, u, 32, 2, 0);
-+  VDUP(vector, , uint, u, 64, 1, 0);
-+  VDUP(vector, q, int, s, 8, 16, 0);
-+  VDUP(vector, q, int, s, 16, 8, 0);
-+  VDUP(vector, q, int, s, 32, 4, 0);
-+  VDUP(vector, q, int, s, 64, 2, 0);
-+  VDUP(vector, q, uint, u, 8, 16, 0);
-+  VDUP(vector, q, uint, u, 16, 8, 0);
-+  VDUP(vector, q, uint, u, 32, 4, 0);
-+  VDUP(vector, q, uint, u, 64, 2, 0);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst4_lane_s16 (int16_t * p, int16x4x4_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst4_lane_s32 (int32_t * p, int32x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++void
++f_vst4_lane_s64 (int64_t * p, int64x1x4_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Choose init value arbitrarily, will be used as shift amount */
-+  /* Use values equal or one-less-than the type width to check
-+     behaviour on limits.  */
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* 64-bits vectors first.  */
-+  /* Shift 8-bits lanes by 7...  */
-+  VDUP(vector_shift, , int, s, 8, 8, 7);
-+  /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9).  */
-+  VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);
-+  VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);
-+  VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);
++void
++f_vst4_lane_s8 (int8_t * p, int8x8x4_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Shift 16-bits lanes by 15... */
-+  VDUP(vector_shift, , int, s, 16, 4, 15);
-+  /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17).  */
-+  VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);
-+  VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);
-+  VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Shift 32-bits lanes by 31... */
-+  VDUP(vector_shift, , int, s, 32, 2, 31);
-+  /* ... except lane 1 (by 30).  */
-+  VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);
++void
++f_vst4_lane_u16 (uint16_t * p, uint16x4x4_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u16 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Shift 64 bits lane by 63.  */
-+  VDUP(vector_shift, , int, s, 64, 1, 63);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* 128-bits vectors.  */
-+  /* Shift 8-bits lanes by 8.  */
-+  VDUP(vector_shift, q, int, s, 8, 16, 8);
-+  /* Shift 16-bits lanes by 16.  */
-+  VDUP(vector_shift, q, int, s, 16, 8, 16);
-+  /* Shift 32-bits lanes by 32...  */
-+  VDUP(vector_shift, q, int, s, 32, 4, 32);
-+  /* ... except lane 1 (by 33).  */
-+  VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);
++void
++f_vst4_lane_u32 (uint32_t * p, uint32x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u32 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* Shift 64-bits lanes by 64... */
-+  VDUP(vector_shift, q, int, s, 64, 2, 64);
-+  /* ... except lane 1 (by 62).  */
-+  VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+#define CMT " (with input = 0)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
++void
++f_vst4_lane_u64 (uint64_t * p, uint64x1x4_t v)
++{
++  /* { dg-error "lane 1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u64 (p, v, 1);
++  /* { dg-error "lane -1 out of range 0 - 0" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
++void
++f_vst4_lane_u8 (uint8_t * p, uint8x8x4_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u8 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4_lane_u8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  /* Use negative shift amounts */
-+  VDUP(vector_shift, , int, s, 8, 8, -1);
-+  VDUP(vector_shift, , int, s, 16, 4, -2);
-+  VDUP(vector_shift, , int, s, 32, 2, -3);
-+  VDUP(vector_shift, , int, s, 64, 1, -4);
-+  VDUP(vector_shift, q, int, s, 8, 16, -7);
-+  VDUP(vector_shift, q, int, s, 16, 8, -11);
-+  VDUP(vector_shift, q, int, s, 32, 4, -13);
-+  VDUP(vector_shift, q, int, s, 64, 2, -20);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+#undef CMT
-+#define CMT " (input 0 and negative shift amount)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
++void
++f_vst4q_lane_f32 (float32_t * p, float32x4x4_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_f32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_f32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  /* Test again, with predefined input values.  */
-+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++void
++f_vst4q_lane_f64 (float64_t * p, float64x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_f64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_f64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* Choose init value arbitrarily, will be used as shift amount.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 1);
-+  VDUP(vector_shift, , int, s, 16, 4, 3);
-+  VDUP(vector_shift, , int, s, 32, 2, 8);
-+  VDUP(vector_shift, , int, s, 64, 1, -3);
-+  VDUP(vector_shift, q, int, s, 8, 16, 10);
-+  VDUP(vector_shift, q, int, s, 16, 8, 12);
-+  VDUP(vector_shift, q, int, s, 32, 4, 32);
-+  VDUP(vector_shift, q, int, s, 64, 2, 63);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+#undef CMT
-+#define CMT ""
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
++void
++f_vst4q_lane_p8 (poly8_t * p, poly8x16x4_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_p8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_p8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++
++void
++f_vst4q_lane_s16 (int16_t * p, int16x8x4_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Use negative shift amounts */
-+  VDUP(vector_shift, , int, s, 8, 8, -1);
-+  VDUP(vector_shift, , int, s, 16, 4, -2);
-+  VDUP(vector_shift, , int, s, 32, 2, -3);
-+  VDUP(vector_shift, , int, s, 64, 1, -4);
-+  VDUP(vector_shift, q, int, s, 8, 16, -7);
-+  VDUP(vector_shift, q, int, s, 16, 8, -11);
-+  VDUP(vector_shift, q, int, s, 32, 4, -13);
-+  VDUP(vector_shift, q, int, s, 64, 2, -20);
++void
++f_vst4q_lane_s32 (int32_t * p, int32x4x4_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
++
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
++
++void
++f_vst4q_lane_s64 (int64_t * p, int64x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+#undef CMT
-+#define CMT " (negative shift amount)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
++void
++f_vst4q_lane_s8 (int8_t * p, int8x16x4_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_s8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  /* Use large shift amounts.  */
-+  VDUP(vector_shift, , int, s, 8, 8, 8);
-+  VDUP(vector_shift, , int, s, 16, 4, 16);
-+  VDUP(vector_shift, , int, s, 32, 2, 32);
-+  VDUP(vector_shift, , int, s, 64, 1, 64);
-+  VDUP(vector_shift, q, int, s, 8, 16, 8);
-+  VDUP(vector_shift, q, int, s, 16, 8, 16);
-+  VDUP(vector_shift, q, int, s, 32, 4, 32);
-+  VDUP(vector_shift, q, int, s, 64, 2, 64);
++void
++f_vst4q_lane_u16 (uint16_t * p, uint16x8x4_t v)
++{
++  /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u16 (p, v, 8);
++  /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u16 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
 +
-+#undef CMT
-+#define CMT " (large shift amount, negative input)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT);
++void
++f_vst4q_lane_u32 (uint32_t * p, uint32x4x4_t v)
++{
++  /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u32 (p, v, 4);
++  /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u32 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+  /* Fill input vector with max value, to check saturation on limits */
-+  VDUP(vector, , int, s, 8, 8, 0x7F);
-+  VDUP(vector, , int, s, 16, 4, 0x7FFF);
-+  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
-+  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, , uint, u, 8, 8, 0xFF);
-+  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
-+  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
-+  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
-+  VDUP(vector, q, int, s, 8, 16, 0x7F);
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, uint, u, 8, 16, 0xFF);
-+  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
-+  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
-+  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++void
++f_vst4q_lane_u64 (uint64_t * p, uint64x2x4_t v)
++{
++  /* { dg-error "lane 2 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u64 (p, v, 2);
++  /* { dg-error "lane -1 out of range 0 - 1" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u64 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c
+@@ -0,0 +1,16 @@
++#include <arm_neon.h>
 +
-+  /* Shift by -1 */
-+  VDUP(vector_shift, , int, s, 8, 8, -1);
-+  VDUP(vector_shift, , int, s, 16, 4, -1);
-+  VDUP(vector_shift, , int, s, 32, 2, -1);
-+  VDUP(vector_shift, , int, s, 64, 1, -1);
-+  VDUP(vector_shift, q, int, s, 8, 16, -1);
-+  VDUP(vector_shift, q, int, s, 16, 8, -1);
-+  VDUP(vector_shift, q, int, s, 32, 4, -1);
-+  VDUP(vector_shift, q, int, s, 64, 2, -1);
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
++/* { dg-excess-errors "" { xfail arm*-*-* } } */
++/* { dg-skip-if "" { arm*-*-* } } */
 +
-+#undef CMT
-+#define CMT " (max input, shift by -1)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT);
++void
++f_vst4q_lane_u8 (uint8_t * p, uint8x16x4_t v)
++{
++  /* { dg-error "lane 16 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u8 (p, v, 16);
++  /* { dg-error "lane -1 out of range 0 - 15" "" { xfail arm*-*-* } 0 } */
++  vst4q_lane_u8 (p, v, -1);
++  return;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
+@@ -0,0 +1,578 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT);
++/* Expected results for vst2, chunk 0.  */
++VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st2_0,uint,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
++VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					       0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
++						 0x0, 0x0 };
 +
++/* Expected results for vst2, chunk 1.  */
++VECT_VAR_DECL(expected_st2_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st2_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 +
-+  /* Use large shift amounts */
-+  VDUP(vector_shift, , int, s, 8, 8, 8);
-+  VDUP(vector_shift, , int, s, 16, 4, 16);
-+  VDUP(vector_shift, , int, s, 32, 2, 32);
-+  VDUP(vector_shift, , int, s, 64, 1, 64);
-+  VDUP(vector_shift, q, int, s, 8, 16, 8);
-+  VDUP(vector_shift, q, int, s, 16, 8, 16);
-+  VDUP(vector_shift, q, int, s, 32, 4, 32);
-+  VDUP(vector_shift, q, int, s, 64, 2, 64);
++/* Expected results for vst3, chunk 0.  */
++VECT_VAR_DECL(expected_st3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
++VECT_VAR_DECL(expected_st3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
++VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
++VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
++VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					      0xfffffff2, 0x0 };
++VECT_VAR_DECL(expected_st3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					       0xfffffff2, 0x0 };
++VECT_VAR_DECL(expected_st3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
++						 0xc1600000, 0x0 };
 +
-+#undef CMT
-+#define CMT " (max input, large shift amount)"
-+  TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT);
-+  TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT);
++/* Expected results for vst3, chunk 1.  */
++VECT_VAR_DECL(expected_st3_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,int,32,2) [] = { 0xfffffff2, 0x0 };
++VECT_VAR_DECL(expected_st3_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
++VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
++VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT);
++/* Expected results for vst3, chunk 2.  */
++VECT_VAR_DECL(expected_st3_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st3_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++
++/* Expected results for vst4, chunk 0.  */
++VECT_VAR_DECL(expected_st4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_st4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
++VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
++VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
++VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					      0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_st4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
++					       0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_st4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
++						 0xc1600000, 0xc1500000 };
 +
++/* Expected results for vst4, chunk 1.  */
++VECT_VAR_DECL(expected_st4_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_st4_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
++VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
++VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 +
-+  /* Check 64 bits saturation.  */
-+  VDUP(vector, , int, s, 64, 1, -10);
-+  VDUP(vector_shift, , int, s, 64, 1, 64);
-+  VDUP(vector, q, int, s, 64, 2, 10);
-+  VDUP(vector_shift, q, int, s, 64, 2, 64);
++/* Expected results for vst4, chunk 2.  */
++VECT_VAR_DECL(expected_st4_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 +
-+#undef CMT
-+#define CMT " (check saturation on 64 bits)"
-+  TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT);
-+  TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT);
++/* Expected results for vst4, chunk 3.  */
++VECT_VAR_DECL(expected_st4_3,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					     0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,int,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					      0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
++					       0x0, 0x0, 0x0, 0x0 };
++VECT_VAR_DECL(expected_st4_3,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 +
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT);
-+}
++/* Declare additional input buffers as needed.  */
++/* Input buffers for vld2_lane.  */
++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
++VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
++
++/* Input buffers for vld3_lane.  */
++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
++VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
++
++/* Input buffers for vld4_lane.  */
++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
++VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
++
++void exec_vstX_lane (void)
++{
++  /* In this case, input variables are arrays of vectors.  */
++#define DECL_VSTX_LANE(T1, W, N, X)					\
++  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
++  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
++  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
++
++  /* We need to use a temporary result buffer (result_bis), because
++     the one used for other tests is not large enough. A subset of the
++     result data is moved from result_bis to result, and it is this
++     subset which is used to check the actual behaviour. The next
++     macro enables to move another chunk of data from result_bis to
++     result.  */
++  /* We also use another extra input buffer (buffer_src), which we
++     fill with 0xAA, and which it used to load a vector from which we
++     read a given lane.  */
++#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)				 \
++  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				 \
++	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			 \
++  memset (VECT_VAR(result_bis_##X, T1, W, N), 0,			 \
++	  sizeof(VECT_VAR(result_bis_##X, T1, W, N)));			 \
++									 \
++  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				 \
++    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		 \
++									 \
++  VECT_ARRAY_VAR(vector, T1, W, N, X) =					 \
++    /* Use dedicated init buffer, of size X.  */			 \
++    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	 \
++			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	 \
++			     L);					 \
++  vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		 \
++			   VECT_ARRAY_VAR(vector, T1, W, N, X),		 \
++			   L);						 \
++  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
++	 sizeof(VECT_VAR(result, T1, W, N)));
++
++  /* Overwrite "result" with the contents of "result_bis"[Y].  */
++#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
++  memcpy(VECT_VAR(result, T1, W, N),			\
++	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
++	 sizeof(VECT_VAR(result, T1, W, N)));
++
++  /* We need all variants in 64 bits, but there is no 64x2 variant,
++     nor 128 bits vectors of int8/uint8/poly8.  */
++#define DECL_ALL_VSTX_LANE(X)			\
++  DECL_VSTX_LANE(int, 8, 8, X);			\
++  DECL_VSTX_LANE(int, 16, 4, X);		\
++  DECL_VSTX_LANE(int, 32, 2, X);		\
++  DECL_VSTX_LANE(uint, 8, 8, X);		\
++  DECL_VSTX_LANE(uint, 16, 4, X);		\
++  DECL_VSTX_LANE(uint, 32, 2, X);		\
++  DECL_VSTX_LANE(poly, 8, 8, X);		\
++  DECL_VSTX_LANE(poly, 16, 4, X);		\
++  DECL_VSTX_LANE(float, 32, 2, X);		\
++  DECL_VSTX_LANE(int, 16, 8, X);		\
++  DECL_VSTX_LANE(int, 32, 4, X);		\
++  DECL_VSTX_LANE(uint, 16, 8, X);		\
++  DECL_VSTX_LANE(uint, 32, 4, X);		\
++  DECL_VSTX_LANE(poly, 16, 8, X);		\
++  DECL_VSTX_LANE(float, 32, 4, X)
++
++#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
++
++  /* Use the same lanes regardless of the size of the array (X), for
++     simplicity.  */
++#define TEST_ALL_VSTX_LANE(X)			\
++  TEST_VSTX_LANE(, int, s, 8, 8, X, 7);		\
++  TEST_VSTX_LANE(, int, s, 16, 4, X, 2);	\
++  TEST_VSTX_LANE(, int, s, 32, 2, X, 0);	\
++  TEST_VSTX_LANE(, float, f, 32, 2, X, 0);	\
++  TEST_VSTX_LANE(, uint, u, 8, 8, X, 4);	\
++  TEST_VSTX_LANE(, uint, u, 16, 4, X, 3);	\
++  TEST_VSTX_LANE(, uint, u, 32, 2, X, 1);	\
++  TEST_VSTX_LANE(, poly, p, 8, 8, X, 4);	\
++  TEST_VSTX_LANE(, poly, p, 16, 4, X, 3);	\
++  TEST_VSTX_LANE(q, int, s, 16, 8, X, 6);	\
++  TEST_VSTX_LANE(q, int, s, 32, 4, X, 2);	\
++  TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5);	\
++  TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0);	\
++  TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5);	\
++  TEST_VSTX_LANE(q, float, f, 32, 4, X, 2)
++
++#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
++  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
++  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
++  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
++  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
++  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
++  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
++  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
++  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
++  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
++  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
++
++  /* Declare the temporary buffers / variables.  */
++  DECL_ALL_VSTX_LANE(2);
++  DECL_ALL_VSTX_LANE(3);
++  DECL_ALL_VSTX_LANE(4);
++
++  /* Define dummy input arrays, large enough for x4 vectors.  */
++  DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
++  DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
++  DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
++  DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
++  DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
++  DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
++  DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
++  DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
++  DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
++  DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
++  DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
++  DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
++  DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
++  DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
++  DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
 +
-+int main (void)
-+{
-+  exec_vqshl ();
-+  return 0;
-+}
---- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c
-@@ -0,0 +1,234 @@
-+#include <arm_neon.h>
-+#include "arm-neon-ref.h"
-+#include "compute-ref-data.h"
++  /* Check vst2_lane/vst2q_lane.  */
++  clean_results ();
++#define TEST_MSG "VST2_LANE/VST2Q_LANE"
++  TEST_ALL_VSTX_LANE(2);
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++#define CMT " (chunk 0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_0, CMT);
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
-+				       0xd0, 0xd4, 0xd8, 0xdc };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 };
-+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
-+					0xd0, 0xd4, 0xd8, 0xdc,
-+					0xe0, 0xe4, 0xe8, 0xec,
-+					0xf0, 0xf4, 0xf8, 0xfc };
-+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
-+					0xffe8, 0xffea, 0xffec, 0xffee };
-+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,
-+					0xffffffe4, 0xffffffe6 };
-+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };
-+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff,
-+					 0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+					 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
-+					 0xffffffffffffffff };
++  TEST_ALL_EXTRA_CHUNKS(2, 1);
++#undef CMT
++#define CMT " chunk 1"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_1, CMT);
 +
-+/* Expected values of cumulative_saturation flag with max positive input.  */
-+int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1;
 +
-+/* Expected results with max positive input.  */
-+VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					   0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					    0x7f, 0x7f, 0x7f, 0x7f,
-+					    0x7f, 0x7f, 0x7f, 0x7f,
-+					    0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
-+					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
-+					    0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff,
-+					    0x7fffffffffffffff };
-+VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					     0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+					     0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff,
-+					     0xffffffffffffffff };
++  /* Check vst3_lane/vst3q_lane.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VST3_LANE/VST3Q_LANE"
++  TEST_ALL_VSTX_LANE(3);
 +
-+#define INSN vqshl
-+#define TEST_MSG "VQSHL_N/VQSHLQ_N"
++#undef CMT
++#define CMT " (chunk 0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_0, CMT);
 +
-+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  TEST_ALL_EXTRA_CHUNKS(3, 1);
 +
-+FNNAME (INSN)
-+{
-+  /* Basic test: v2=vqshl_n(v1,v), then store the result.  */
-+#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
-+  VECT_VAR(vector_res, T1, W, N) =					\
-+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+			V);						\
-+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
-+		    VECT_VAR(vector_res, T1, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++#undef CMT
++#define CMT " (chunk 1)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_1, CMT);
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  TEST_ALL_EXTRA_CHUNKS(3, 2);
 +
-+#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
-+  TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++#undef CMT
++#define CMT " (chunk 2)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_2, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_2, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_2, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_2, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_2, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_2, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_2, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_2, CMT);
 +
-+  DECL_VARIABLE_ALL_VARIANTS(vector);
-+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
 +
++  /* Check vst4_lane/vst4q_lane.  */
 +  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VST4_LANE/VST4Q_LANE"
++  TEST_ALL_VSTX_LANE(4);
 +
-+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
-+
-+  /* Choose shift amount arbitrarily.  */
-+#define CMT ""
-+  TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT);
-+
-+  TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT);
++#undef CMT
++#define CMT " (chunk 0)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_0, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_0, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_0, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_0, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_0, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_0, CMT);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++  TEST_ALL_EXTRA_CHUNKS(4, 1);
 +
++#undef CMT
++#define CMT " (chunk 1)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_1, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_1, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_1, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_1, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_1, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_1, CMT);
 +
-+  /* Fill input vector with max value, to check saturation on limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0x7F);
-+  VDUP(vector, , int, s, 16, 4, 0x7FFF);
-+  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
-+  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, , uint, u, 8, 8, 0xFF);
-+  VDUP(vector, , uint, u, 16, 4, 0xFFFF);
-+  VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
-+  VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
-+  VDUP(vector, q, int, s, 8, 16, 0x7F);
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, uint, u, 8, 16, 0xFF);
-+  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
-+  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
-+  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++  TEST_ALL_EXTRA_CHUNKS(4, 2);
 +
 +#undef CMT
-+#define CMT " (with max input)"
-+  TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT);
-+
-+  TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT);
-+  TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT);
++#define CMT " (chunk 2)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_2, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_2, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_2, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_2, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_2, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_2, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_2, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_2, CMT);
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT);
-+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT);
++  TEST_ALL_EXTRA_CHUNKS(4, 3);
++
++#undef CMT
++#define CMT " (chunk 3)"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_3, CMT);
++  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_3, CMT);
++  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_3, CMT);
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
++  CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
++  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
++  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_3, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_3, CMT);
++  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT);
++  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_3, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqshl_n ();
++  exec_vstX_lane ();
 +  return 0;
 +}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c
+@@ -18,10 +18,6 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf,
+ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 };
+ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 };
+ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+ VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd,
+ 					0xfe, 0xff, 0x0, 0x1,
+ 					0x2, 0x3, 0x4, 0x5,
+@@ -41,14 +37,6 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba,
+ 					 0xffffffbb, 0xffffffbc };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed,
+ 					 0xffffffffffffffee };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					  0x33333333, 0x33333333 };
+ 
+ /* Expected results for float32 variants. Needs to be separated since
+    the generic test function does not test floating-point
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c
+@@ -6,43 +6,13 @@
+ #define TEST_MSG "VSUBL"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+ 					0x1, 0x2, 0x3, 0x4 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+ 					 0x1, 0x2, 0x3, 0x4 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x1, 0x2 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #include "vXXXl.inc"
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c
+@@ -6,45 +6,15 @@
+ #define TEST_MSG "VSUBW"
+ 
+ /* Expected results.  */
+-VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-				       0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+-VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+-VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33,
+-					0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+ 					0x1, 0x2, 0x3, 0x4 };
+ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 };
+ VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 };
+-VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfefd, 0xfefe, 0xfeff, 0xff00,
+ 					 0xff01, 0xff02, 0xff03, 0xff04 };
+ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffeffff, 0xffff0000,
+ 					 0xffff0001, 0xffff0002 };
+ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffff00000000,
+ 					 0xffffffff00000001 };
+-VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33,
+-					 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+-					 0x3333, 0x3333, 0x3333, 0x3333 };
+-VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+-					   0x33333333, 0x33333333 };
+ 
+ #include "vXXXw.inc"
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c
-@@ -0,0 +1,263 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c
+@@ -0,0 +1,289 @@
 +#include <arm_neon.h>
 +#include "arm-neon-ref.h"
 +#include "compute-ref-data.h"
 +
-+/* Expected values of cumulative_saturation flag with negative
-+   input.  */
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
-+
-+/* Expected results with negative input.  */
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					     0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 };
++/* Expected results for vtbl1.  */
++VECT_VAR_DECL(expected_vtbl1,int,8,8) [] = { 0x0, 0xf2, 0xf2, 0xf2,
++					     0x0, 0x0, 0xf2, 0xf2 };
++VECT_VAR_DECL(expected_vtbl1,uint,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3,
++					      0x0, 0x0, 0xf3, 0xf3 };
++VECT_VAR_DECL(expected_vtbl1,poly,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3,
++					      0x0, 0x0, 0xf3, 0xf3 };
++
++/* Expected results for vtbl2.  */
++VECT_VAR_DECL(expected_vtbl2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3,
++					     0x0, 0x0, 0xf3, 0xf3 };
++VECT_VAR_DECL(expected_vtbl2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5,
++					      0x0, 0x0, 0xf5, 0xf5 };
++VECT_VAR_DECL(expected_vtbl2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5,
++					      0x0, 0x0, 0xf5, 0xf5 };
++
++/* Expected results for vtbl3.  */
++VECT_VAR_DECL(expected_vtbl3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4,
++					     0xff, 0x0, 0xf4, 0xf4 };
++VECT_VAR_DECL(expected_vtbl3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7,
++					      0xff, 0x0, 0xf7, 0xf7 };
++VECT_VAR_DECL(expected_vtbl3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7,
++					      0xff, 0x0, 0xf7, 0xf7 };
++
++/* Expected results for vtbl4.  */
++VECT_VAR_DECL(expected_vtbl4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5,
++					    0x3, 0x0, 0xf5, 0xf5 };
++VECT_VAR_DECL(expected_vtbl4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9,
++					     0x3, 0x0, 0xf9, 0xf9 };
++VECT_VAR_DECL(expected_vtbl4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9,
++					     0x3, 0x0, 0xf9, 0xf9 };
++
++/* Expected results for vtbx1.  */
++VECT_VAR_DECL(expected_vtbx1,int,8,8) [] = { 0x33, 0xf2, 0xf2, 0xf2,
++					    0x33, 0x33, 0xf2, 0xf2 };
++VECT_VAR_DECL(expected_vtbx1,uint,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3,
++					     0xcc, 0xcc, 0xf3, 0xf3 };
++VECT_VAR_DECL(expected_vtbx1,poly,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3,
++					     0xcc, 0xcc, 0xf3, 0xf3 };
++
++/* Expected results for vtbx2.  */
++VECT_VAR_DECL(expected_vtbx2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3,
++					    0x33, 0x33, 0xf3, 0xf3 };
++VECT_VAR_DECL(expected_vtbx2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5,
++					     0xcc, 0xcc, 0xf5, 0xf5 };
++VECT_VAR_DECL(expected_vtbx2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5,
++					     0xcc, 0xcc, 0xf5, 0xf5 };
++
++/* Expected results for vtbx3.  */
++VECT_VAR_DECL(expected_vtbx3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4,
++					    0xff, 0x33, 0xf4, 0xf4 };
++VECT_VAR_DECL(expected_vtbx3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7,
++					     0xff, 0xcc, 0xf7, 0xf7 };
++VECT_VAR_DECL(expected_vtbx3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7,
++					     0xff, 0xcc, 0xf7, 0xf7 };
++
++/* Expected results for vtbx4.  */
++VECT_VAR_DECL(expected_vtbx4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5,
++					     0x3, 0x33, 0xf5, 0xf5 };
++VECT_VAR_DECL(expected_vtbx4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9,
++					      0x3, 0xcc, 0xf9, 0xf9 };
++VECT_VAR_DECL(expected_vtbx4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9,
++					      0x3, 0xcc, 0xf9, 0xf9 };
 +
-+/* Expected values of cumulative_saturation flag with shift by 1.  */
-+int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0;
++void exec_vtbX (void)
++{
++  int i;
 +
-+/* Expected results with shift by 1.  */
-+VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
-+					    0xfe, 0xfe, 0xfe, 0xfe };
-+VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
-+VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
-+VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe };
-+VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
-+					     0xfe, 0xfe, 0xfe, 0xfe,
-+					     0xfe, 0xfe, 0xfe, 0xfe,
-+					     0xfe, 0xfe, 0xfe, 0xfe };
-+VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe,
-+					     0xfffe, 0xfffe, 0xfffe, 0xfffe };
-+VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe,
-+					     0xfffffffe, 0xfffffffe };
-+VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe,
-+					     0xfffffffffffffffe };
++  /* In this case, input variables are arrays of vectors.  */
++#define DECL_VTBX(T1, W, N, X)						\
++  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X)
++
++  /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a
++     vector as 1st param, instead of an array of vectors.  */
++#define TEST_VTBL1(T1, T2, T3, W, N)			\
++  VECT_VAR(table_vector, T1, W, N) =			\
++    vld1##_##T2##W((T1##W##_t *)lookup_table);		\
++							\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N),	\
++		  VECT_VAR(vector, T3, W, N));		\
++  vst1_##T2##W(VECT_VAR(result, T1, W, N),		\
++	       VECT_VAR(vector_res, T1, W, N));
++
++#define TEST_VTBLX(T1, T2, T3, W, N, X)					\
++  VECT_ARRAY_VAR(table_vector, T1, W, N, X) =				\
++    vld##X##_##T2##W((T1##W##_t *)lookup_table);			\
++									\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X),	\
++		      VECT_VAR(vector, T3, W, N));			\
++  vst1_##T2##W(VECT_VAR(result, T1, W, N),				\
++	       VECT_VAR(vector_res, T1, W, N));
++
++  /* We need to define a lookup table.  */
++  uint8_t lookup_table[32];
++
++  DECL_VARIABLE(vector, int, 8, 8);
++  DECL_VARIABLE(vector, uint, 8, 8);
++  DECL_VARIABLE(vector, poly, 8, 8);
++  DECL_VARIABLE(vector_res, int, 8, 8);
++  DECL_VARIABLE(vector_res, uint, 8, 8);
++  DECL_VARIABLE(vector_res, poly, 8, 8);
 +
-+/* Expected values of cumulative_saturation flag with shift by 2.  */
-+int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1;
++  /* For vtbl1.  */
++  DECL_VARIABLE(table_vector, int, 8, 8);
++  DECL_VARIABLE(table_vector, uint, 8, 8);
++  DECL_VARIABLE(table_vector, poly, 8, 8);
++
++  /* For vtbx*.  */
++  DECL_VARIABLE(default_vector, int, 8, 8);
++  DECL_VARIABLE(default_vector, uint, 8, 8);
++  DECL_VARIABLE(default_vector, poly, 8, 8);
++
++  /* We need only 8 bits variants.  */
++#define DECL_ALL_VTBLX(X)			\
++  DECL_VTBX(int, 8, 8, X);			\
++  DECL_VTBX(uint, 8, 8, X);			\
++  DECL_VTBX(poly, 8, 8, X)
++
++#define TEST_ALL_VTBL1()			\
++  TEST_VTBL1(int, s, int, 8, 8);		\
++  TEST_VTBL1(uint, u, uint, 8, 8);		\
++  TEST_VTBL1(poly, p, uint, 8, 8)
++
++#define TEST_ALL_VTBLX(X)			\
++  TEST_VTBLX(int, s, int, 8, 8, X);		\
++  TEST_VTBLX(uint, u, uint, 8, 8, X);		\
++  TEST_VTBLX(poly, p, uint, 8, 8, X)
++
++  /* Declare the temporary buffers / variables.  */
++  DECL_ALL_VTBLX(2);
++  DECL_ALL_VTBLX(3);
++  DECL_ALL_VTBLX(4);
++
++  /* Fill the lookup table.  */
++  for (i=0; i<32; i++) {
++    lookup_table[i] = i-15;
++  }
 +
-+/* Expected results with shift by 2.  */
-+VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					    0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff };
-+VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff,
-+					     0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
-+					     0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
-+					     0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff,
-+					     0xffffffffffffffff };
++  /* Choose init value arbitrarily, will be used as table index.  */
++  VDUP(vector, , int, s, 8, 8, 1);
++  VDUP(vector, , uint, u, 8, 8, 2);
++  VDUP(vector, , poly, p, 8, 8, 2);
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++  /* To ensure coverage, add some indexes larger than 8,16 and 32
++     except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9).  */
++  VSET_LANE(vector, , int, s, 8, 8, 0, 10);
++  VSET_LANE(vector, , int, s, 8, 8, 4, 20);
++  VSET_LANE(vector, , int, s, 8, 8, 5, 40);
++  VSET_LANE(vector, , uint, u, 8, 8, 0, 10);
++  VSET_LANE(vector, , uint, u, 8, 8, 4, 20);
++  VSET_LANE(vector, , uint, u, 8, 8, 5, 40);
++  VSET_LANE(vector, , poly, p, 8, 8, 0, 10);
++  VSET_LANE(vector, , poly, p, 8, 8, 4, 20);
++  VSET_LANE(vector, , poly, p, 8, 8, 5, 40);
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 };
-+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 };
-+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0,
-+					 0xa0, 0xa0, 0xa0, 0xa0,
-+					 0xa0, 0xa0, 0xa0, 0xa0,
-+					 0xa0, 0xa0, 0xa0, 0xa0 };
-+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180,
-+					 0x180, 0x180, 0x180, 0x180 };
-+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 };
-+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 };
 +
++  /* Check vtbl1.  */
++  clean_results ();
++#define TEST_MSG "VTBL1"
++  TEST_ALL_VTBL1();
 +
-+#define INSN vqshlu
-+#define TEST_MSG "VQSHLU_N/VQSHLUQ_N"
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl1, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl1, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, "");
 +
-+#define FNNAME1(NAME) void exec_ ## NAME ## _n(void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  /* Check vtbl2.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBL2"
++  TEST_ALL_VTBLX(2);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl2, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl2, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, "");
 +
-+FNNAME (INSN)
-+{
-+  /* Basic test: v2=vqshlu_n(v1,v), then store the result.  */
-+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N));		\
-+  VECT_VAR(vector_res, T3, W, N) =					\
-+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
-+			V);						\
-+  vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N),				\
-+		    VECT_VAR(vector_res, T3, W, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Check vtbl3.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBL3"
++  TEST_ALL_VTBLX(3);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl3, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl3, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, "");
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Check vtbl4.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBL4"
++  TEST_ALL_VTBLX(4);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl4, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl4, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, "");
++
++
++  /* Now test VTBX.  */
++
++  /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a
++     vector as 1st param, instead of an array of vectors.  */
++#define TEST_VTBX1(T1, T2, T3, W, N)			\
++  VECT_VAR(table_vector, T1, W, N) =			\
++    vld1##_##T2##W((T1##W##_t *)lookup_table);		\
++							\
++  VECT_VAR(vector_res, T1, W, N) =			\
++    vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N),	\
++		  VECT_VAR(table_vector, T1, W, N),	\
++		  VECT_VAR(vector, T3, W, N));		\
++  vst1_##T2##W(VECT_VAR(result, T1, W, N),		\
++	       VECT_VAR(vector_res, T1, W, N));
++
++#define TEST_VTBXX(T1, T2, T3, W, N, X)					\
++  VECT_ARRAY_VAR(table_vector, T1, W, N, X) =				\
++    vld##X##_##T2##W((T1##W##_t *)lookup_table);			\
++									\
++  VECT_VAR(vector_res, T1, W, N) =					\
++    vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N),		\
++		      VECT_ARRAY_VAR(table_vector, T1, W, N, X),	\
++		      VECT_VAR(vector, T3, W, N));			\
++  vst1_##T2##W(VECT_VAR(result, T1, W, N),				\
++	       VECT_VAR(vector_res, T1, W, N));
++
++#define TEST_ALL_VTBX1()			\
++  TEST_VTBX1(int, s, int, 8, 8);		\
++  TEST_VTBX1(uint, u, uint, 8, 8);		\
++  TEST_VTBX1(poly, p, uint, 8, 8)
++
++#define TEST_ALL_VTBXX(X)			\
++  TEST_VTBXX(int, s, int, 8, 8, X);		\
++  TEST_VTBXX(uint, u, uint, 8, 8, X);		\
++  TEST_VTBXX(poly, p, uint, 8, 8, X)
++
++  /* Choose init value arbitrarily, will be used as default value.  */
++  VDUP(default_vector, , int, s, 8, 8, 0x33);
++  VDUP(default_vector, , uint, u, 8, 8, 0xCC);
++  VDUP(default_vector, , poly, p, 8, 8, 0xCC);
 +
-+#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++  /* Check vtbx1.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBX1"
++  TEST_ALL_VTBX1();
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx1, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx1, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, "");
 +
++  /* Check vtbx2.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBX2"
++  TEST_ALL_VTBXX(2);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx2, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx2, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, "");
 +
-+  DECL_VARIABLE_ALL_VARIANTS(vector);
-+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
++  /* Check vtbx3.  */
++  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBX3"
++  TEST_ALL_VTBXX(3);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx3, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx3, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, "");
 +
++  /* Check vtbx4.  */
 +  clean_results ();
++#undef TEST_MSG
++#define TEST_MSG "VTBX4"
++  TEST_ALL_VTBXX(4);
++
++  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx4, "");
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx4, "");
++  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, "");
++}
 +
-+  /* Fill input vector with negative values, to check saturation on
-+     limits.  */
-+  VDUP(vector, , int, s, 8, 8, -1);
-+  VDUP(vector, , int, s, 16, 4, -2);
-+  VDUP(vector, , int, s, 32, 2, -3);
-+  VDUP(vector, , int, s, 64, 1, -4);
-+  VDUP(vector, q, int, s, 8, 16, -1);
-+  VDUP(vector, q, int, s, 16, 8, -2);
-+  VDUP(vector, q, int, s, 32, 4, -3);
-+  VDUP(vector, q, int, s, 64, 2, -4);
++int main (void)
++{
++  exec_vtbX ();
++  return 0;
++}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
+@@ -8,12 +8,10 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0x11, 0x11,
+ 					0xf2, 0xf3, 0x11, 0x11 };
+ VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1, 0x22, 0x22 };
+ VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
+ 					 0xf2, 0xf3, 0x55, 0x55 };
+ VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
+ 					 0xf2, 0xf3, 0x55, 0x55 };
+ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+@@ -25,8 +23,6 @@ VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
+ VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1, 0x22, 0x22,
+ 					 0xfff2, 0xfff3, 0x22, 0x22 };
+ VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 };
+-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
+ 					  0xf2, 0xf3, 0x55, 0x55,
+ 					  0xf4, 0xf5, 0x55, 0x55,
+@@ -34,8 +30,6 @@ VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
+ VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
+ 					  0xfff2, 0xfff3, 0x66, 0x66 };
+ VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 };
+-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
+ 					  0xf2, 0xf3, 0x55, 0x55,
+ 					  0xf4, 0xf5, 0x55, 0x55,
+@@ -50,12 +44,10 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf4, 0xf5, 0x11, 0x11,
+ 					0xf6, 0xf7, 0x11, 0x11 };
+ VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff2, 0xfff3, 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
+ 					 0xf6, 0xf7, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
+ 					 0xf6, 0xf7, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+@@ -67,8 +59,6 @@ VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
+ VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff4, 0xfff5, 0x22, 0x22,
+ 					 0xfff6, 0xfff7, 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
+ 					  0xfa, 0xfb, 0x55, 0x55,
+ 					  0xfc, 0xfd, 0x55, 0x55,
+@@ -76,8 +66,6 @@ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
+ VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
+ 					  0xfff6, 0xfff7, 0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
+ 					  0xfa, 0xfb, 0x55, 0x55,
+ 					  0xfc, 0xfd, 0x55, 0x55,
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtst.c
+@@ -0,0 +1,120 @@
++#include <arm_neon.h>
++#include "arm-neon-ref.h"
++#include "compute-ref-data.h"
 +
-+  /* Choose shift amount arbitrarily.  */
-+#define CMT " (negative input)"
-+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT);
++/* Expected results with signed input.  */
++VECT_VAR_DECL(expected_signed,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff,
++					       0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_signed,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff };
++VECT_VAR_DECL(expected_signed,uint,32,2) [] = { 0x0, 0xffffffff };
++VECT_VAR_DECL(expected_signed,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff,
++						0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_signed,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff,
++						0xffff, 0xffff, 0xffff, 0xffff };
++VECT_VAR_DECL(expected_signed,uint,32,4) [] = { 0x0, 0xffffffff,
++						0x0, 0xffffffff };
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
++/* Expected results with unsigned input.  */
++VECT_VAR_DECL(expected_unsigned,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff,
++						 0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_unsigned,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff };
++VECT_VAR_DECL(expected_unsigned,uint,32,2) [] = { 0x0, 0xffffffff };
++VECT_VAR_DECL(expected_unsigned,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff,
++						  0xff, 0xff, 0xff, 0xff };
++VECT_VAR_DECL(expected_unsigned,uint,16,8) [] = { 0x0, 0xffff,
++						  0x0, 0xffff,
++						  0xffff, 0xffff,
++						  0xffff, 0xffff };
++VECT_VAR_DECL(expected_unsigned,uint,32,4) [] = { 0x0, 0xffffffff,
++						  0x0, 0xffffffff };
 +
-+  
-+  /* Fill input vector with max value, to check saturation on
-+     limits.  */
-+  VDUP(vector, , int, s, 8, 8, 0x7F);
-+  VDUP(vector, , int, s, 16, 4, 0x7FFF);
-+  VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
-+  VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, int, s, 8, 16, 0x7F);
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);
++#ifndef INSN_NAME
++#define INSN_NAME vtst
++#define TEST_MSG "VTST/VTSTQ"
++#endif
 +
-+  /* shift by 1.  */
-+#undef CMT
-+#define CMT " (shift by 1)"
-+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT);
++/* We can't use the standard ref_v_binary_op.c template because vtst
++   has no 64 bits variant, and outputs are always of uint type.  */
++#define FNNAME1(NAME) void exec_ ## NAME (void)
++#define FNNAME(NAME) FNNAME1(NAME)
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT);
++FNNAME (INSN_NAME)
++{
++  /* Basic test: y=OP(x,x), then store the result.  */
++#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)		\
++  VECT_VAR(vector_res, uint, W, N) =			\
++    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
++		      VECT_VAR(vector2, T1, W, N));	\
++  vst1##Q##_u##W(VECT_VAR(result, uint, W, N),		\
++		 VECT_VAR(vector_res, uint, W, N))
 +
-+  /* shift by 2 to force saturation.  */
-+#undef CMT
-+#define CMT " (shift by 2)"
-+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT);
++#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)	\
++  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)	\
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT);
++  DECL_VARIABLE_ALL_VARIANTS(vector);
++  DECL_VARIABLE_ALL_VARIANTS(vector2);
++  DECL_VARIABLE_UNSIGNED_VARIANTS(vector_res);
 +
-+  
-+  /* Fill input vector with positive values, to check normal case.  */
-+  VDUP(vector, , int, s, 8, 8, 1);
-+  VDUP(vector, , int, s, 16, 4, 2);
-+  VDUP(vector, , int, s, 32, 2, 3);
-+  VDUP(vector, , int, s, 64, 1, 4);
-+  VDUP(vector, q, int, s, 8, 16, 5);
-+  VDUP(vector, q, int, s, 16, 8, 6);
-+  VDUP(vector, q, int, s, 32, 4, 7);
-+  VDUP(vector, q, int, s, 64, 2, 8);
 +
-+  /* Arbitrary shift amount.  */
++  clean_results ();
++
++  /* Initialize input "vector" from "buffer".  */
++  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
++
++  /* Choose init value arbitrarily, will be used as comparison
++     value.  */
++  VDUP(vector2, , int, s, 8, 8, 15);
++  VDUP(vector2, , int, s, 16, 4, 5);
++  VDUP(vector2, , int, s, 32, 2, 1);
++  VDUP(vector2, , uint, u, 8, 8, 15);
++  VDUP(vector2, , uint, u, 16, 4, 5);
++  VDUP(vector2, , uint, u, 32, 2, 1);
++  VDUP(vector2, q, int, s, 8, 16, 15);
++  VDUP(vector2, q, int, s, 16, 8, 5);
++  VDUP(vector2, q, int, s, 32, 4, 1);
++  VDUP(vector2, q, uint, u, 8, 16, 15);
++  VDUP(vector2, q, uint, u, 16, 8, 5);
++  VDUP(vector2, q, uint, u, 32, 4, 1);
++
++#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2)	\
++  MACRO(VAR, , T1, T2, 8, 8);					\
++  MACRO(VAR, , T1, T2, 16, 4);					\
++  MACRO(VAR, , T1, T2, 32, 2);					\
++  MACRO(VAR, q, T1, T2, 8, 16);					\
++  MACRO(VAR, q, T1, T2, 16, 8);					\
++  MACRO(VAR, q, T1, T2, 32, 4)
++
++  /* Split the test, as both signed and unsigned variants output their
++     result in an unsigned form (thus the same output variable is used
++     in these tests).  */
++  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, int, s);
++
++#define CMT " (signed input)"
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_signed, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_signed, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_signed, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_signed, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_signed, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_signed, CMT);
++
++  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, uint, u);
++
 +#undef CMT
-+#define CMT ""
-+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT);
-+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT);
-+
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
++#define CMT " (unsigned input)"
++  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_unsigned, CMT);
++  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_unsigned, CMT);
++  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_unsigned, CMT);
++  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_unsigned, CMT);
++  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_unsigned, CMT);
++  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_unsigned, CMT);
 +}
 +
 +int main (void)
 +{
-+  exec_vqshlu_n ();
++  exec_vtst ();
 +  return 0;
 +}
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
+@@ -9,14 +9,12 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1,
+ 					 0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					 0xf4, 0xf5, 0xf6, 0xf7 };
+ VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1,
+ 					  0xfff2, 0xfff3 };
+ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0,
+ 					  0xfffffff1 };
+-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					 0xf4, 0xf5, 0xf6, 0xf7 };
+ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
+@@ -32,8 +30,6 @@ VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1,
+ 					 0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					 0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					  0xf4, 0xf5, 0xf6, 0xf7,
+ 					  0xf8, 0xf9, 0xfa, 0xfb,
+@@ -44,8 +40,6 @@ VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1,
+ 					  0xfff6, 0xfff7 };
+ VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 					  0xfffffff2, 0xfffffff3 };
+-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 					  0xf4, 0xf5, 0xf6, 0xf7,
+ 					  0xf8, 0xf9, 0xfa, 0xfb,
+@@ -62,12 +56,10 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11,
+ 					0x11, 0x11, 0x11, 0x11 };
+ VECT_VAR_DECL(expected1,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+ 					 0x55, 0x55, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+ 					 0x55, 0x55, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+@@ -79,8 +71,6 @@ VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
+ VECT_VAR_DECL(expected1,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22,
+ 					 0x22, 0x22, 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+ 					  0x55, 0x55, 0x55, 0x55,
+ 					  0x55, 0x55, 0x55, 0x55,
+@@ -88,8 +78,6 @@ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+ VECT_VAR_DECL(expected1,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
+ 					  0x66, 0x66, 0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+ 					  0x55, 0x55, 0x55, 0x55,
+ 					  0x55, 0x55, 0x55, 0x55,
+--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
+@@ -9,13 +9,11 @@ VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11,
+ VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff2,
+ 					 0x22, 0x22 };
+ VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+ 					 0xf1, 0xf5, 0x55, 0x55 };
+ VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff2,
+ 					  0x66, 0x66 };
+ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+-VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+ 					 0xf1, 0xf5, 0x55, 0x55 };
+ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
+@@ -29,8 +27,6 @@ VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22,
+ 					 0xfff1, 0xfff5, 0x22, 0x22 };
+ VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff2,
+ 					 0x33, 0x33 };
+-VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+ 					  0xf1, 0xf9, 0x55, 0x55,
+ 					  0xf2, 0xfa, 0x55, 0x55,
+@@ -39,8 +35,6 @@ VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
+ 					  0xfff1, 0xfff5, 0x66, 0x66 };
+ VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff2,
+ 					  0x77, 0x77 };
+-VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+ 					  0xf1, 0xf9, 0x55, 0x55,
+ 					  0xf2, 0xfa, 0x55, 0x55,
+@@ -56,13 +50,11 @@ VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11,
+ VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff3,
+ 					 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+ 					 0xf3, 0xf7, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff3,
+ 					  0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+ 					 0xf3, 0xf7, 0x55, 0x55 };
+ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
+@@ -76,8 +68,6 @@ VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22,
+ 					 0xfff3, 0xfff7, 0x22, 0x22 };
+ VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff3,
+ 					 0x33, 0x33 };
+-VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+-					 0x3333333333333333 };
+ VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+ 					  0xf5, 0xfd, 0x55, 0x55,
+ 					  0xf6, 0xfe, 0x55, 0x55,
+@@ -86,8 +76,6 @@ VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
+ 					  0xfff3, 0xfff7, 0x66, 0x66 };
+ VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff3,
+ 					  0x77, 0x77 };
+-VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+-					  0x3333333333333333 };
+ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+ 					  0xf5, 0xfd, 0x55, 0x55,
+ 					  0xf6, 0xfe, 0x55, 0x55,
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c
-@@ -0,0 +1,177 @@
-+#include <arm_neon.h>
-+#include "arm-neon-ref.h"
-+#include "compute-ref-data.h"
++++ b/src/gcc/testsuite/gcc.target/aarch64/arm_align_max_pwr.c
+@@ -0,0 +1,15 @@
++/* { dg-do run } */
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
++#include <stdio.h>
++#include <assert.h>
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
-+				       0xfa, 0xfa, 0xfb, 0xfb };
-+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 };
-+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+					0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++#define align (1ul << __ARM_ALIGN_MAX_PWR)
++static int x __attribute__ ((aligned (align)));
 +
-+/* Expected values of cumulative_saturation flag with max input value
-+   shifted by 3.  */
-+int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1;
++int
++main ()
++{
++  assert ((((unsigned long)&x) & (align - 1)) == 0);
 +
-+/* Expected results with max input value shifted by 3.  */
-+VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+					       0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff,
-+						 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/arm_align_max_stack_pwr.c
+@@ -0,0 +1,15 @@
++/* { dg-do run } */
 +
-+/* Expected values of cumulative_saturation flag with max input value
-+   shifted by type size.  */
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0;
-+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0;
++#include <stdio.h>
++#include <assert.h>
 +
-+/* Expected results with max input value shifted by type size.  */
-+VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
-+						 0x7f, 0x7f, 0x7f, 0x7f };
-+VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff,
-+						  0x7fff, 0x7fff };
-+VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
-+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						  0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff,
-+						   0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
++#define align (1ul << __ARM_ALIGN_MAX_STACK_PWR)
 +
-+#define INSN vqshrn_n
-+#define TEST_MSG "VQSHRN_N"
++int
++main ()
++{
++  int x __attribute__ ((aligned (align)));
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++  assert ((((unsigned long)&x) & (align - 1)) == 0);
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c
+@@ -0,0 +1,61 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
 +
-+FNNAME (INSN)
-+{
-+  /* Basic test: y=vqshrn_n(x,v), then store the result.  */
-+#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));		\
-+  VECT_VAR(vector_res, T1, W2, N) =					\
-+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
-+		   V);							\
-+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),				\
-+		VECT_VAR(vector_res, T1, W2, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++/* Test ARMv8.1-A CAS instruction.  */
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++#include "atomic-inst-ops.inc"
 +
-+#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++#define TEST TEST_TWO
 +
++#define CAS_ATOMIC(FN, TY, MODEL1, MODEL2)				\
++  int FNNAME (FN, TY) (TY* val, TY* foo, TY* bar)			\
++  {									\
++    int model_s = MODEL1;						\
++    int model_f = MODEL2;						\
++    /* The success memory ordering must be at least as strong as	\
++       the failure memory ordering.  */					\
++    if (model_s < model_f)						\
++      return 0;								\
++    /* Ignore invalid memory orderings.  */				\
++    if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL)	\
++      return 0;								\
++    return __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f); \
++  }
 +
-+  /* vector is twice as large as vector_res.  */
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
-+  DECL_VARIABLE(vector, int, 64, 2);
-+  DECL_VARIABLE(vector, uint, 16, 8);
-+  DECL_VARIABLE(vector, uint, 32, 4);
-+  DECL_VARIABLE(vector, uint, 64, 2);
++#define CAS_ATOMIC_NORETURN(FN, TY, MODEL1, MODEL2)			\
++  void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar)			\
++  {									\
++    int model_s = MODEL1;						\
++    int model_f = MODEL2;						\
++    /* The success memory ordering must be at least as strong as	\
++       the failure memory ordering.  */					\
++    if (model_s < model_f)						\
++      return;								\
++    /* Ignore invalid memory orderings.  */				\
++    if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL)	\
++      return;								\
++    __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f);	\
++  }
 +
-+  DECL_VARIABLE(vector_res, int, 8, 8);
-+  DECL_VARIABLE(vector_res, int, 16, 4);
-+  DECL_VARIABLE(vector_res, int, 32, 2);
-+  DECL_VARIABLE(vector_res, uint, 8, 8);
-+  DECL_VARIABLE(vector_res, uint, 16, 4);
-+  DECL_VARIABLE(vector_res, uint, 32, 2);
++TEST (cas_atomic, CAS_ATOMIC)
++TEST (cas_atomic_noreturn, CAS_ATOMIC_NORETURN)
 +
-+  clean_results ();
 +
-+  VLOAD(vector, buffer, q, int, s, 16, 8);
-+  VLOAD(vector, buffer, q, int, s, 32, 4);
-+  VLOAD(vector, buffer, q, int, s, 64, 2);
-+  VLOAD(vector, buffer, q, uint, u, 16, 8);
-+  VLOAD(vector, buffer, q, uint, u, 32, 4);
-+  VLOAD(vector, buffer, q, uint, u, 64, 2);
++/* { dg-final { scan-assembler-times "casb\t" 4} } */
++/* { dg-final { scan-assembler-times "casab\t" 20} } */
++/* { dg-final { scan-assembler-times "caslb\t" 4} } */
++/* { dg-final { scan-assembler-times "casalb\t" 36} } */
++
++/* { dg-final { scan-assembler-times "cash\t" 4} } */
++/* { dg-final { scan-assembler-times "casah\t" 20} } */
++/* { dg-final { scan-assembler-times "caslh\t" 4} } */
++/* { dg-final { scan-assembler-times "casalh\t" 36} } */
++
++/* { dg-final { scan-assembler-times "cas\t" 8} } */
++/* { dg-final { scan-assembler-times "casa\t" 40} } */
++/* { dg-final { scan-assembler-times "casl\t" 8} } */
++/* { dg-final { scan-assembler-times "casal\t" 72} } */
++
++/* { dg-final { scan-assembler-not "ldaxr\t" } } */
++/* { dg-final { scan-assembler-not "stlxr\t" } } */
++/* { dg-final { scan-assembler-not "dmb" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
+@@ -0,0 +1,87 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
 +
-+  /* Choose shift amount arbitrarily.  */
-+#define CMT ""
-+  TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
-+  TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
-+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
-+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
++/* Test ARMv8.1-A Load-ADD instruction.  */
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
++#include "atomic-inst-ops.inc"
 +
++#define TEST TEST_ONE
 +
-+  /* Use max possible value as input.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
-+  VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
-+  VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
-+  VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
++#define LOAD_ADD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_fetch_add (val, foo, MODEL);			\
++  }
 +
-+#undef CMT
-+#define CMT " (check saturation: shift by 3)"
-+  TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
-+  TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
-+  TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
-+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
-+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
-+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
++#define LOAD_ADD_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_fetch_add (val, foo, MODEL);				\
++  }
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT);
++#define LOAD_SUB(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_fetch_sub (val, foo, MODEL);			\
++  }
 +
++#define LOAD_SUB_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_fetch_sub (val, foo, MODEL);				\
++  }
 +
-+#undef CMT
-+#define CMT " (check saturation: shift by max)"
-+  TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
-+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
++#define ADD_LOAD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_add_fetch (val, foo, MODEL);			\
++  }
 +
-+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
-+}
++#define ADD_LOAD_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_add_fetch (val, foo, MODEL);				\
++  }
 +
-+int main (void)
-+{
-+  exec_vqshrn_n ();
-+  return 0;
-+}
++#define SUB_LOAD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_sub_fetch (val, foo, MODEL);			\
++  }
++
++#define SUB_LOAD_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_sub_fetch (val, foo, MODEL);				\
++  }
++
++TEST (load_add, LOAD_ADD)
++TEST (load_add_notreturn, LOAD_ADD_NORETURN)
++
++TEST (load_sub, LOAD_SUB)
++TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
++
++TEST (add_load, ADD_LOAD)
++TEST (add_load_notreturn, ADD_LOAD_NORETURN)
++
++TEST (sub_load, SUB_LOAD)
++TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
++
++/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
++/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
++/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
++/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
++
++/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
++/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
++/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
++/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
++
++/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
++/* { dg-final { scan-assembler-times "ldadda\t" 64} } */
++/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
++/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
++
++/* { dg-final { scan-assembler-not "ldaxr\t" } } */
++/* { dg-final { scan-assembler-not "stlxr\t" } } */
++/* { dg-final { scan-assembler-not "dmb" } } */
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c
-@@ -0,0 +1,133 @@
-+#include <arm_neon.h>
-+#include "arm-neon-ref.h"
-+#include "compute-ref-data.h"
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
+@@ -0,0 +1,155 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
 +
-+/* Expected values of cumulative_saturation flag with negative input.  */
-+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
++/* Test ARMv8.1-A LD<logic-op> instruction.  */
++
++#include "atomic-inst-ops.inc"
++
++#define TEST TEST_ONE
++
++#define LOAD_OR(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_fetch_or (val, foo, MODEL);				\
++  }
++
++#define LOAD_OR_NORETURN(FN, TY, MODEL)					\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_fetch_or (val, foo, MODEL);				\
++  }
++
++#define LOAD_AND(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_fetch_and (val, foo, MODEL);			\
++  }
++
++#define LOAD_AND_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_fetch_and (val, foo, MODEL);				\
++  }
++
++#define LOAD_XOR(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_fetch_xor (val, foo, MODEL);			\
++  }
++
++#define LOAD_XOR_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_fetch_xor (val, foo, MODEL);				\
++  }
++
++#define OR_LOAD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_or_fetch (val, foo, MODEL);				\
++  }
++
++#define OR_LOAD_NORETURN(FN, TY, MODEL)					\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_or_fetch (val, foo, MODEL);				\
++  }
++
++#define AND_LOAD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_and_fetch (val, foo, MODEL);			\
++  }
++
++#define AND_LOAD_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_and_fetch (val, foo, MODEL);				\
++  }
++
++#define XOR_LOAD(FN, TY, MODEL)						\
++  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
++  {									\
++    return __atomic_xor_fetch (val, foo, MODEL);			\
++  }
++
++#define XOR_LOAD_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo)				\
++  {									\
++    __atomic_xor_fetch (val, foo, MODEL);				\
++  }
++
++
++TEST (load_or, LOAD_OR)
++TEST (load_or_notreturn, LOAD_OR_NORETURN)
++
++TEST (load_and, LOAD_AND)
++TEST (load_and_notreturn, LOAD_AND_NORETURN)
++
++TEST (load_xor, LOAD_XOR)
++TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
++
++TEST (or_load, OR_LOAD)
++TEST (or_load_notreturn, OR_LOAD_NORETURN)
++
++TEST (and_load, AND_LOAD)
++TEST (and_load_notreturn, AND_LOAD_NORETURN)
++
++TEST (xor_load, XOR_LOAD)
++TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
 +
-+/* Expected results with negative input.  */
-+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
-+					    0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
 +
-+/* Expected values of cumulative_saturation flag with max input value
-+   shifted by 1.  */
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
++/* Load-OR.  */
 +
-+/* Expected results with max input value shifted by 1.  */
-+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
-+						0xff, 0xff, 0xff, 0xff };
-+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
-+						 0xffff, 0xffff };
-+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
-+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
++/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
++/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
 +
-+/* Expected values of cumulative_saturation flag.  */
-+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
-+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
-+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
++/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
++/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
++/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
++/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
 +
-+/* Expected results.  */
-+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48,
-+					0x48, 0x48, 0x48, 0x48 };
-+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
-+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe };
++/* { dg-final { scan-assembler-times "ldset\t" 16} } */
++/* { dg-final { scan-assembler-times "ldseta\t" 32} } */
++/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
++/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
 +
++/* Load-AND.  */
 +
-+#define INSN vqshrun_n
-+#define TEST_MSG "VQSHRUN_N"
++/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
++/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
 +
-+#define FNNAME1(NAME) void exec_ ## NAME (void)
-+#define FNNAME(NAME) FNNAME1(NAME)
++/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
++/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
++/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
++/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
 +
-+FNNAME (INSN)
-+{
-+  /* Basic test: y=vqshrun_n(x,v), then store the result.  */
-+#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N));	\
-+  VECT_VAR(vector_res, uint, W2, N) =					\
-+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
-+		   V);							\
-+  vst1_u##W2(VECT_VAR(result, uint, W2, N),				\
-+	     VECT_VAR(vector_res, uint, W2, N));			\
-+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
++/* { dg-final { scan-assembler-times "ldclr\t" 16} */
++/* { dg-final { scan-assembler-times "ldclra\t" 32} } */
++/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
++/* { dg-final { scan-assembler-times "ldclral\t" 32} } */
 +
-+  /* Two auxliary macros are necessary to expand INSN */
-+#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++/* Load-XOR.  */
 +
-+#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
-+  TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
++/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
++/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
++/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
 +
++/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
++/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
++/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
++/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
 +
-+  /* vector is twice as large as vector_res.  */
-+  DECL_VARIABLE(vector, int, 16, 8);
-+  DECL_VARIABLE(vector, int, 32, 4);
-+  DECL_VARIABLE(vector, int, 64, 2);
++/* { dg-final { scan-assembler-times "ldeor\t" 16} */
++/* { dg-final { scan-assembler-times "ldeora\t" 32} } */
++/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
++/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
 +
-+  DECL_VARIABLE(vector_res, uint, 8, 8);
-+  DECL_VARIABLE(vector_res, uint, 16, 4);
-+  DECL_VARIABLE(vector_res, uint, 32, 2);
++/* { dg-final { scan-assembler-not "ldaxr\t" } } */
++/* { dg-final { scan-assembler-not "stlxr\t" } } */
++/* { dg-final { scan-assembler-not "dmb" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc
+@@ -0,0 +1,66 @@
++/* Support code for atomic instruction tests.  */
++
++/* Define types names without spaces.  */
++typedef unsigned char uchar;
++typedef unsigned short ushort;
++typedef unsigned int uint;
++typedef long long longlong;
++typedef unsigned long long ulonglong;
++typedef __int128_t int128;
++typedef __uint128_t uint128;
++
++#define FNNAME(NAME,TY) NAME
++
++/* Expand one-model functions.  */
++#define TEST_M1(NAME, FN, TY, MODEL, DUMMY)	\
++  FN (test_##NAME##_##TY, TY, MODEL)
++
++/* Expand two-model functions.  */
++#define TEST_M2(NAME, FN, TY, MODEL1, MODEL2)	\
++  FN (test_##NAME##_##TY, TY, MODEL1, MODEL2)
++
++/* Typest to test.  */
++#define TEST_TY(NAME, FN, N, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, char, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, uchar, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, short, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, ushort, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, int, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, uint, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, longlong, MODEL1, MODEL2)	\
++  TEST_M##N (NAME, FN, ulonglong, MODEL1, MODEL2)	\
++  TEST_M##N (NAME, FN, int128, MODEL1, MODEL2)		\
++  TEST_M##N (NAME, FN, uint128, MODEL1, MODEL2)
++
++/* Models to test.  */
++#define TEST_MODEL(NAME, FN, N)					\
++  TEST_TY (NAME##_relaxed, FN, N, __ATOMIC_RELAXED, DUMMY)	\
++  TEST_TY (NAME##_consume, FN, N, __ATOMIC_CONSUME, DUMMY)	\
++  TEST_TY (NAME##_acquire, FN, N, __ATOMIC_ACQUIRE, DUMMY)	\
++  TEST_TY (NAME##_release, FN, N, __ATOMIC_RELEASE, DUMMY)	\
++  TEST_TY (NAME##_acq_rel, FN, N, __ATOMIC_ACQ_REL, DUMMY)	\
++  TEST_TY (NAME##_seq_cst, FN, N, __ATOMIC_SEQ_CST, DUMMY)	\
++
++/* Cross-product of models to test.  */
++#define TEST_MODEL_M1(NAME, FN, N, M)			\
++  TEST_TY (NAME##_relaxed, FN, N, M, __ATOMIC_RELAXED)	\
++  TEST_TY (NAME##_consume, FN, N, M, __ATOMIC_CONSUME)	\
++  TEST_TY (NAME##_acquire, FN, N, M, __ATOMIC_ACQUIRE)	\
++  TEST_TY (NAME##_release, FN, N, M, __ATOMIC_RELEASE)	\
++  TEST_TY (NAME##_acq_rel, FN, N, M, __ATOMIC_ACQ_REL)	\
++  TEST_TY (NAME##_seq_cst, FN, N, M, __ATOMIC_SEQ_CST)	\
++
++#define TEST_MODEL_M2(NAME, FN)					\
++  TEST_MODEL_M1 (NAME##_relaxed, FN, 2, __ATOMIC_RELAXED)	\
++  TEST_MODEL_M1 (NAME##_consume, FN, 2, __ATOMIC_CONSUME)	\
++  TEST_MODEL_M1 (NAME##_acquire, FN, 2, __ATOMIC_ACQUIRE)	\
++  TEST_MODEL_M1 (NAME##_release, FN, 2, __ATOMIC_RELEASE)	\
++  TEST_MODEL_M1 (NAME##_acq_rel, FN, 2, __ATOMIC_ACQ_REL)	\
++  TEST_MODEL_M1 (NAME##_seq_cst, FN, 2, __ATOMIC_SEQ_CST)	\
 +
-+  clean_results ();
++/* Expand functions for a cross-product of memory models and types.  */
++#define TEST_TWO(NAME, FN) TEST_MODEL_M2 (NAME, FN)
 +
-+  /* Fill input vector with negative values, to check saturation on
-+     limits.  */
-+  VDUP(vector, q, int, s, 16, 8, -2);
-+  VDUP(vector, q, int, s, 32, 4, -3);
-+  VDUP(vector, q, int, s, 64, 2, -4);
++/* Expand functions for a set of memory models and types.  */
++#define TEST_ONE(NAME, FN) TEST_MODEL (NAME, FN, 1)
 +
-+  /* Choose shift amount arbitrarily.  */
-+#define CMT " (negative input)"
-+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
-+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c
+@@ -0,0 +1,44 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
++/* Test ARMv8.1-A SWP instruction.  */
 +
-+  
-+  /* Fill input vector with max value, to check saturation on
-+     limits.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x7FFF);
-+  VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
-+  VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
++#include "atomic-inst-ops.inc"
 +
-+#undef CMT
-+#define CMT " (check cumulative saturation)"
-+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
-+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
-+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
++#define TEST TEST_ONE
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
++#define SWAP_ATOMIC(FN, TY, MODEL)					\
++  TY FNNAME (FN, TY) (TY* val, TY foo)					\
++  {									\
++    return __atomic_exchange_n (val, foo, MODEL);			\
++  }
 +
-+  
-+  /* Fill input vector with positive values, to check normal case.  */
-+  VDUP(vector, q, int, s, 16, 8, 0x1234);
-+  VDUP(vector, q, int, s, 32, 4, 0x87654321);
-+  VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
++#define SWAP_ATOMIC_NORETURN(FN, TY, MODEL)				\
++  void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar)			\
++  {									\
++    __atomic_exchange (val, foo, bar, MODEL);				\
++  }
 +
-+#undef CMT
-+#define CMT ""
-+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
-+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
-+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
 +
-+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
-+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
-+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
-+}
++TEST (swap_atomic, SWAP_ATOMIC)
++TEST (swap_atomic_noreturn, SWAP_ATOMIC_NORETURN)
 +
-+int main (void)
-+{
-+  exec_vqshrun_n ();
-+  return 0;
-+}
++
++/* { dg-final { scan-assembler-times "swpb\t" 4} } */
++/* { dg-final { scan-assembler-times "swpab\t" 8} } */
++/* { dg-final { scan-assembler-times "swplb\t" 4} } */
++/* { dg-final { scan-assembler-times "swpalb\t" 8} } */
++
++/* { dg-final { scan-assembler-times "swph\t" 4} } */
++/* { dg-final { scan-assembler-times "swpah\t" 8} } */
++/* { dg-final { scan-assembler-times "swplh\t" 4} } */
++/* { dg-final { scan-assembler-times "swpalh\t" 8} } */
++
++/* { dg-final { scan-assembler-times "swp\t" 8} } */
++/* { dg-final { scan-assembler-times "swpa\t" 16} } */
++/* { dg-final { scan-assembler-times "swpl\t" 8} } */
++/* { dg-final { scan-assembler-times "swpal\t" 16} } */
++
++/* { dg-final { scan-assembler-not "ldaxr\t" } } */
++/* { dg-final { scan-assembler-not "stlxr\t" } } */
++/* { dg-final { scan-assembler-not "dmb" } } */
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/c-output-template-4.c
 @@ -0,0 +1,10 @@
@@ -11272,47 +30700,199 @@
 +
 +/* { dg-final { scan-assembler "@ test\\+4" } } */
 --- a/src//dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/pow-sqrt-synth-1.c
-@@ -0,0 +1,38 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-mem.c
+@@ -0,0 +1,10 @@
 +/* { dg-do compile } */
-+/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */
++/* { dg-options "-O2" } */
 +
++void
++foo (double *output)
++{
++  *output = 0.0;
++}
 +
-+double
-+foo (double a)
++/* { dg-final { scan-assembler "str\\txzr, \\\[x0\\\]" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++void bar (double);
++void
++foo (void)
 +{
-+  return __builtin_pow (a, -5.875);
++  bar (0.0);
 +}
 +
-+double
-+foof (double a)
++/* { dg-final { scan-assembler "fmov\\td0, xzr" } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero.c
++++ b/src//dev/null
+@@ -1,10 +0,0 @@
+-/* { dg-do compile } */
+-/* { dg-options "-O2" } */
+-
+-void
+-foo (double *output)
+-{
+-  *output = 0.0;
+-}
+-
+-/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-mem.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++void
++foo (float *output)
 +{
-+  return __builtin_pow (a, 0.75f);
++  *output = 0.0;
 +}
 +
-+double
-+bar (double a)
++/* { dg-final { scan-assembler "str\\twzr, \\\[x0\\\]" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++void bar (float);
++void
++foo (void)
 +{
-+  return __builtin_pow (a, 1.0 + 0.00390625);
++  bar (0.0);
 +}
 +
-+double
-+baz (double a)
++/* { dg-final { scan-assembler "fmov\\ts0, wzr" } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero.c
++++ b/src//dev/null
+@@ -1,10 +0,0 @@
+-/* { dg-do compile } */
+-/* { dg-options "-O2" } */
+-
+-void
+-foo (float *output)
+-{
+-  *output = 0.0;
+-}
+-
+-/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovld-zero-mem.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++void
++foo (long double *output)
 +{
-+  return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375);
++  *output = 0.0;
 +}
 +
-+#define N 256
++/* { dg-final { scan-assembler "stp\\txzr, xzr, \\\[x0\\\]" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovld-zero-reg.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++void bar (long double);
 +void
-+vecfoo (double *a)
++foo (void)
 +{
-+  for (int i = 0; i < N; i++)
-+    a[i] = __builtin_pow (a[i], 1.25);
++  bar (0.0);
 +}
 +
-+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */
-+/* { dg-final { cleanup-tree-dump "sincos" } } */
-\ No newline at end of file
++/* { dg-final { scan-assembler "movi\\tv0\.2d, #0" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_1.c
+@@ -0,0 +1,10 @@
++/* { dg-options "-mgeneral-regs-only" } */
++
++typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
++
++/* { dg-error "'-mgeneral-regs-only' is incompatible with vector return type" "" {target "aarch64*-*-*"} 7 } */
++/* { dg-error "'-mgeneral-regs-only' is incompatible with vector argument" "" {target "aarch64*-*-*"} 7 } */
++int32x2_t test (int32x2_t a, int32x2_t b)
++{
++  return a + b;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_2.c
+@@ -0,0 +1,15 @@
++/* { dg-options "-mgeneral-regs-only" } */
++
++#include <stdarg.h>
++
++typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
++
++int
++test (int i, ...)
++{
++  va_list argp;
++  va_start (argp, i);
++  int32x2_t x = (int32x2_t) {0, 1};
++  x += va_arg (argp, int32x2_t); /* { dg-error "'-mgeneral-regs-only' is incompatible with vector varargs" } */
++  return x[0] + x[1];
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/mgeneral-regs_3.c
+@@ -0,0 +1,11 @@
++/* { dg-options "-mgeneral-regs-only -O2" } */
++
++extern void abort (void);
++
++int
++test (int i, ...)
++{
++  float f = (float) i; /* { dg-error "'-mgeneral-regs-only' is incompatible with floating-point code" } */
++  if (f != f) abort ();
++  return 2;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/neg_abs_1.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-save-temps -O2" } */
++
++int
++f1 (int x)
++{
++  return x < 0 ? x : -x;
++}
++
++long long
++f2 (long long x)
++{
++  return x < 0 ? x : -x;
++}
++
++/* { dg-final { scan-assembler-not "\tneg\tw\[0-9\]*.*" } } */
++/* { dg-final { scan-assembler-not "\tneg\tx\[0-9\]*.*" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/nofp_1.c
+@@ -0,0 +1,19 @@
++/* { dg-skip-if "conflicting -march" { *-*-* } { "-march=*" } { "-march=*+nofp" } } */
++/* If there are multiple -march's, the latest wins; skip the test either way.
++   -march overrides -mcpu, so there is no possibility of conflict.  */
++
++/* { dg-options "-march=armv8-a+nofp" } */
++
++#include <stdarg.h>
++
++typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
++
++int test (int i, ...);
++
++int
++main (int argc, char **argv)
++{
++  int32x2_t a = (int32x2_t) {0, 1};
++  int32x2_t b = (int32x2_t) {2, 3};
++  return test (2, a, b); /* { dg-error "'\\+nofp' feature modifier is incompatible with vector argument" } */
++}
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/pr65491_1.c
 @@ -0,0 +1,11 @@
@@ -11398,6 +30978,61 @@
 +   -fomit-frame-pointer which avoids use of stp in the prologue to main().  */
 +/* { dg-final { scan-assembler-not "stp\\t" } } */
 +/* { dg-final { cleanup-saved-temps } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
+@@ -40,6 +40,7 @@ THING (int16x8_t, 8, int16_t, q_s16)	\
+ THING (uint16x8_t, 8, uint16_t, q_u16)	\
+ THING (int32x4_t, 4, int32_t, q_s32)	\
+ THING (uint32x4_t, 4, uint32_t, q_u32)	\
++THING (float32x4_t, 4, float32_t, q_f32)\
+ THING (int64x2_t, 2, int64_t, q_s64)	\
+ THING (uint64x2_t, 2, uint64_t, q_u64)	\
+ THING (float64x2_t, 2, float64_t, q_f64)
+--- a/src/gcc/testsuite/gcc.target/aarch64/vld1_lane.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/vld1_lane.c
+@@ -56,7 +56,7 @@ VARIANTS (TESTMETH)
+ 
+ #define CHECK(BASE, Q, ELTS, SUFFIX, LANE)			\
+   if (test_vld1##Q##_lane##SUFFIX ((const BASE##_t *)orig_data,	\
+-				   BASE##_data) != 0)	\
++				   & BASE##_data) != 0)	\
+     abort ();
+ 
+ int
+@@ -65,20 +65,19 @@ main (int argc, char **argv)
+   /* Original data for all vector formats.  */
+   uint64_t orig_data[2] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL};
+ 
+-  /* Data with which vldN_lane will overwrite some of previous.  */
+-  uint8_t uint8_data[4] = { 7, 11, 13, 17 };
+-  uint16_t uint16_data[4] = { 257, 263, 269, 271 };
+-  uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
+-  uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
+-			      0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+-  int8_t int8_data[4] = { -1, 3, -5, 7 };
+-  int16_t int16_data[4] = { 257, -259, 261, -263 };
+-  int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+-  int64_t *int64_data = (int64_t *)uint64_data;
+-  poly8_t poly8_data[4] = { 0, 7, 13, 18, };
+-  poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+-  float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+-  float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
++  /* Data with which vld1_lane will overwrite one element of previous.  */
++  uint8_t uint8_data = 7;
++  uint16_t uint16_data = 257;
++  uint32_t uint32_data = 65537;
++  uint64_t uint64_data = 0xdeadbeefcafebabeULL;
++  int8_t int8_data = -1;
++  int16_t int16_data = -259;
++  int32_t int32_data = -987654321;
++  int64_t int64_data = 0x1234567890abcdefLL;
++  poly8_t poly8_data = 13;
++  poly16_t poly16_data = 11111;
++  float32_t float32_data = 3.14159;
++  float64_t float64_data = 1.010010001;
+ 
+   VARIANTS (CHECK);
+   return 0;
 --- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
 +++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
 @@ -54,11 +54,11 @@ test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data,		\
@@ -11468,31 +31103,150 @@
 +  return 0;								\
 +}
 +
-+/* Tests of vst2_lane and vst2q_lane.  */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vst3_lane and vst3q_lane.  */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vst4_lane and vst4q_lane.  */
-+VARIANTS (TESTMETH, 4)
++/* Tests of vst2_lane and vst2q_lane.  */
++VARIANTS (TESTMETH, 2)
++/* Tests of vst3_lane and vst3q_lane.  */
++VARIANTS (TESTMETH, 3)
++/* Tests of vst4_lane and vst4q_lane.  */
++VARIANTS (TESTMETH, 4)
++
++#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
++  if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data))	\
++    abort ();
++
++int
++main (int argc, char **argv)
++{
++  /* Original data for all vector formats.  */
++  uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
++			   0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
++			   0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
++			   0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
++
++  VARIANTS (CHECK, 2);
++  VARIANTS (CHECK, 3);
++  VARIANTS (CHECK, 4);
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/attr_arm-err.c
+@@ -0,0 +1,13 @@
++/* Check that attribute target arm is rejected for M profile.  */
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_arm_ok } */
++/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-march=*" } { "-march=armv6-m" } } */
++/* { dg-add-options arm_arch_v6m } */
++
++int __attribute__((target("arm")))
++foo(int a)
++{  /* { dg-error "does not support" } */
++  return a ? 1 : 5;
++}
++
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/attr_arm.c
+@@ -0,0 +1,11 @@
++/* Check that attribute target arm is recognized.  */
++/* { dg-do compile } */
++/* { dg-final { scan-assembler "\\.arm" } } */
++/* { dg-final { scan-assembler-not "\\.thumb_func" } } */
++
++int __attribute__((target("arm")))
++foo(int a)
++{
++  return a ? 1 : 5;
++}
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb-static.c
+@@ -0,0 +1,25 @@
++/* Check that a change mode to a static function is correctly handled. */
++/* { dg-do run } */
++
++static void
++ __attribute__((__noinline__)) 
++foo (void)
++{
++  __asm__ ("");
++}
++
++static void
++__attribute__((__noinline__)) 
++__attribute__((target("thumb")))
++bar (void)
++{
++  __asm__ ("");
++}
++
++int
++main (void)
++{
++  foo();
++  bar();
++  return 0;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb-static2.c
+@@ -0,0 +1,40 @@
++/* Check interwork between static functions for thumb2. */
 +
-+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
-+  if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data))	\
-+    abort ();
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_arch_v7a_ok } */
++/* { dg-options "-O0 -march=armv7-a" } */
 +
-+int
-+main (int argc, char **argv)
++struct _NSPoint
 +{
-+  /* Original data for all vector formats.  */
-+  uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
-+			   0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
-+			   0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
-+			   0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
++  float x;
++  float y;
++};
 +
-+  VARIANTS (CHECK, 2);
-+  VARIANTS (CHECK, 3);
-+  VARIANTS (CHECK, 4);
-+  return 0;
++typedef struct _NSPoint NSPoint;
++
++static NSPoint
++__attribute__ ((target("arm")))
++NSMakePoint (float x, float y)
++{
++  NSPoint point;
++  point.x = x;
++  point.y = y;
++  return point;
++}
++
++static NSPoint
++__attribute__ ((target("thumb")))
++RelativePoint (NSPoint point, NSPoint refPoint)
++{
++  return NSMakePoint (refPoint.x + point.x, refPoint.y + point.y);
++}
++
++NSPoint
++__attribute__ ((target("arm")))
++g(NSPoint refPoint)
++{
++  float pointA, pointB;
++  return RelativePoint (NSMakePoint (0, pointA), refPoint);
++}
++
++/* { dg-final { scan-assembler-times "blx" 2 } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/attr_thumb.c
+@@ -0,0 +1,15 @@
++/* Check that attribute target thumb is recognized. */
++/* { dg-do compile } */
++/* Make sure the current multilib supports thumb.  */
++/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
++/* { dg-options "-O2 -mno-restrict-it" } */
++/* { dg-final { scan-assembler-not "\\.arm"  } } */
++/* { dg-final { scan-assembler "\\.thumb_func" } } */
++
++int __attribute__((target("thumb")))
++foo(int a)
++{
++  /* { dg-final { scan-assembler "ite" { target { arm_thumb2_ok } } } } */
++  return a ? 1 : 5;
 +}
++
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/bics_1.c
 @@ -0,0 +1,54 @@
@@ -11706,6 +31460,81 @@
 +
 +/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
 +/* { dg-final { cleanup-saved-temps } } */
+--- a/src/gcc/testsuite/gcc.target/arm/fixed_float_conversion.c
++++ b/src/gcc/testsuite/gcc.target/arm/fixed_float_conversion.c
+@@ -3,6 +3,7 @@
+ /* { dg-require-effective-target arm_vfp3_ok } */
+ /* { dg-options "-O1" } */
+ /* { dg-add-options arm_vfp3 } */
++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+ 
+ float
+ fixed_to_float (int i)
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/flip-thumb.c
+@@ -0,0 +1,26 @@
++/* Check -mflip-thumb. */
++/* { dg-do compile } */
++/* Make sure the current multilib supports thumb.  */
++/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
++/* { dg-options "-O2 -mflip-thumb -mno-restrict-it" } */
++/* { dg-final { scan-assembler "\\.arm" } } */
++/* { dg-final { scan-assembler-times "\\.thumb_func" 1} } */
++
++int 
++foo(int a)
++{
++  return a ? 1 : 5;
++}
++
++int 
++bar(int a)
++{
++  return a ? 1 : 5;
++}
++
++/* { dg-final { scan-assembler-times "ite" 1 { target { arm_thumb2_ok } } } } */
++
++
++
++
++
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/maskdata.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options " -O2" }  */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++#define MASK 0xff00ff
++void maskdata (int * data, int len)
++{
++   int i = len;
++   for (; i > 0; i -= 2)
++    {
++      data[i] &= MASK;
++      data[i + 1] &= MASK;
++    }
++}
++/* { dg-final { scan-assembler-not "65280" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/memset-inline-10.c
++++ b/src/gcc/testsuite/gcc.target/arm/memset-inline-10.c
+@@ -1,5 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-options "-march=armv7-a -mfloat-abi=hard -mfpu=neon -O2" } */
++/* { dg-skip-if "need SIMD instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
++/* { dg-skip-if "need SIMD instructions" { *-*-* } { "-mfpu=vfp*" } { "" } } */
+ 
+ #define BUF 100
+ long a[BUF];
+--- a/src/gcc/testsuite/gcc.target/arm/neon-reload-class.c
++++ b/src/gcc/testsuite/gcc.target/arm/neon-reload-class.c
+@@ -15,4 +15,4 @@ _op_blend_p_caa_dp(unsigned *s, unsigned* e, unsigned *d, unsigned c) {
+ 
+ /* These constants should be emitted as immediates rather than loaded from memory.  */
+ 
+-/* { dg-final { scan-assembler-not "(\\.d?word|mov(w|t))" } } */
++/* { dg-final { scan-assembler-not "(\\.d?word)" } } */
 --- a/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c
 +++ b/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c
 @@ -58,18 +58,18 @@ GEN_COND_TESTS(vceq)
@@ -11739,6 +31568,15 @@
  /* { dg-final { scan-assembler-times "vceq\.i8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
  /* { dg-final { scan-assembler-times "vceq\.i16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
  /* { dg-final { scan-assembler-times "vceq\.i32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
+--- a/src/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
++++ b/src/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
+@@ -1,5 +1,6 @@
+ /* { dg-do compile } */
+ /* { dg-require-effective-target arm_thumb2_ok } */
++/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-march=*" "-mcpu=*" } { "-mcpu=cortex-m7" } } */
+ /* { dg-options "-Os -mthumb -mcpu=cortex-m7" } */
+ 
+ int
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/pr26702.c
 @@ -0,0 +1,4 @@
@@ -11755,6 +31593,54 @@
 -/* { dg-final { scan-assembler-times "ldr" 1 } } */
 +/* { dg-final { scan-assembler-times "str" 1 } } */
 --- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr43404.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target naked_functions } */
++/* { dg-options "-O0" } */
++
++__attribute__ ((naked))
++void __data_abort(void)
++{
++  long foo; /* { dg-error "cannot allocate stack for variable" } */
++  long* bar = &foo;
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr48470.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target naked_functions } */
++/* { dg-options "-O0" } */
++
++extern void g(int *x);
++
++void __attribute__((naked)) f(void)
++{
++    int x = 0; /* { dg-error "cannot allocate stack for variable" } */
++    g(&x);
++}
+--- a/src/gcc/testsuite/gcc.target/arm/pr58784.c
++++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c
+@@ -1,6 +1,8 @@
+ /* { dg-do compile } */
+ /* { dg-skip-if "incompatible options" { arm_thumb1 } { "*" } { "" } } */
+ /* { dg-options "-march=armv7-a -mfloat-abi=hard -mfpu=neon -marm -O2" } */
++/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
++
+ 
+ typedef struct __attribute__ ((__packed__))
+ {
+--- a/src/gcc/testsuite/gcc.target/arm/pr59985.C
++++ b/src/gcc/testsuite/gcc.target/arm/pr59985.C
+@@ -1,6 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-skip-if "incompatible options" { arm_thumb1 } { "*" } { "" } } */
+ /* { dg-options "-g -fcompare-debug -O2 -march=armv7-a -mtune=cortex-a9 -mfpu=vfpv3-d16 -mfloat-abi=hard" } */
++/* { dg-skip-if "need hardfp abi" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+ 
+ extern void *f1 (unsigned long, unsigned long);
+ extern const struct line_map *f2 (void *, int, unsigned int, const char *, unsigned int);
+--- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/pr64208.c
 @@ -0,0 +1,25 @@
 +/* { dg-do compile } */
@@ -11801,6 +31687,65 @@
 +/* { dg-final { scan-rtl-dump "GLOBAL COPY-PROP" "cprop2" } } */
 +/* { dg-final { cleanup-rtl-dump "cprop2" } } */
 --- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr64744-1.c
+@@ -0,0 +1,40 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target naked_functions } */
++/* { dg-options "-O0" } */
++
++__attribute__((naked))
++void foo1 ()
++{
++  int aa = 0;
++  int ab = {0};
++}
++
++__attribute__((naked))
++void foo2() {
++  char aa [ ] = {}; /* { dg-error "cannot allocate stack for variable" } */
++  char ab [1] = {};
++  char ac [2] = {}; /* { dg-error "cannot allocate stack for variable" } */
++  char ad [3] = {}; /* { dg-error "cannot allocate stack for variable" } */
++}
++
++__attribute__((naked))
++void foo3() {
++  char aa [1] = {0};
++  char ab [2] = {0}; /* { dg-error "cannot allocate stack for variable" } */
++  char ac [3] = {0}; /* { dg-error "cannot allocate stack for variable" } */
++  char ad [4] = {0}; /* { dg-error "cannot allocate stack for variable" } */
++}
++
++__attribute__((naked))
++void foo4() {
++  char aa [2] = {0,0}; /* { dg-error "cannot allocate stack for variable" } */
++}
++__attribute__((naked))
++void foo5() {
++  char aa [3] = {0,0,0}; /* { dg-error "cannot allocate stack for variable" } */
++}
++
++__attribute__((naked))
++void foo6() {
++  char aa [4] = {0,0,0,0}; /* { dg-error "cannot allocate stack for variable" } */
++}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr64744-2.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target naked_functions } */
++/* { dg-options "-O0" } */
++
++struct s {
++  char a;
++    int b;
++};
++
++__attribute__((naked))
++void foo () {
++  struct s x = {}; /* { dg-error "cannot allocate stack for variable" } */
++}
+--- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/pr64818.c
 @@ -0,0 +1,30 @@
 +/* { dg-do compile } */
@@ -11841,6 +31786,15 @@
  /* { dg-options "-mthumb -mcpu=cortex-m3 -O2" } */
  
  struct tmp {
+--- a/src/gcc/testsuite/gcc.target/arm/pr65647.c
++++ b/src/gcc/testsuite/gcc.target/arm/pr65647.c
+@@ -1,4 +1,6 @@
+ /* { dg-do compile } */
++/* { dg-require-effective-target arm_arch_v6m_ok } */
++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */
+ /* { dg-options "-march=armv6-m -mthumb -O3 -w -mfloat-abi=soft" } */
+ 
+ a, b, c, e, g = &e, h, i = 7, l = 1, m, n, o, q = &m, r, s = &r, u, w = 9, x,
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/pr65710.c
 @@ -0,0 +1,120 @@
@@ -11989,6 +31943,145 @@
 +  if (b + a < 0)
 +    c = 0;
 +}
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pragma_attribute.c
+@@ -0,0 +1,35 @@
++/* Test for #prama target macros.  */
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_thumb1_ok } */
++
++#pragma GCC target ("thumb")
++
++#ifndef __thumb__
++#error "__thumb__ is not defined"
++#endif
++
++#ifdef __thumb2__
++#ifndef __ARM_32BIT_STATE
++#error  "__ARM_32BIT_STATE is not defined"
++#endif
++#else /* thumb1 */
++#ifdef __ARM_32BIT_STATE
++#error  "__ARM_32BIT_STATE is defined"
++#endif
++#endif /* thumb1 */
++
++#pragma GCC target ("arm")
++
++#ifdef __thumb__
++#error "__thumb__ is defined"
++#endif
++
++#if defined (__thumb2__) || defined (__thumb1__)
++#error "thumb is defined"
++#endif 
++
++#ifndef __ARM_32BIT_STATE
++#error  "__ARM_32BIT_STATE is not defined"
++#endif
++
++#pragma GCC reset_options
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/reg_equal_test.c
+@@ -0,0 +1,24 @@
++/* { dg-do compile } */
++/* { dg-options "-O1 -fdump-rtl-expand" } */
++
++extern void abort (void);
++unsigned int a = 1;
++
++int
++main (void)
++{
++  unsigned int b, c, d;
++
++  if (sizeof (int) != 4 || (int) 0xc7d24b5e > 0)
++    return 0;
++
++  c = 0xc7d24b5e;
++  d = a | -2;
++  b = (d == 0) ? c : (c % d);
++  if (b != c)
++    abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-rtl-dump "expr_list:REG_EQUAL \\(const_int -942519458" "expand" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/scd42-1.c
++++ b/src/gcc/testsuite/gcc.target/arm/scd42-1.c
+@@ -1,7 +1,8 @@
+ /* Verify that mov is preferred on XScale for loading a 1 byte constant. */
+ /* { dg-do compile } */
+ /* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "" } } */
+-/* { dg-options "-mcpu=xscale -O" } */
++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
++/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */
+ 
+ unsigned load1(void) __attribute__ ((naked));
+ unsigned load1(void)
+--- a/src/gcc/testsuite/gcc.target/arm/scd42-3.c
++++ b/src/gcc/testsuite/gcc.target/arm/scd42-3.c
+@@ -2,7 +2,8 @@
+ /* { dg-do compile } */
+ /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-march=*" } { "-march=xscale" } } */
+ /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-mcpu=*" } { "-mcpu=xscale" } } */
+-/* { dg-options "-mcpu=xscale -O" } */
++/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
++/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */
+ 
+ unsigned load4(void) __attribute__ ((naked));
+ unsigned load4(void)
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/short-it-ifcvt-1.c
+@@ -0,0 +1,23 @@
++/* Test that ifcvt is not being too aggressive when -mrestrict-it.  */
++/* { dg-do compile } */
++/* { dg-options "-O2 -mrestrict-it" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++f1(int x, int y, int z)
++{
++  if (x > 100)
++    {
++      x++;
++      z = -z;
++    }
++  else
++    {
++      x = -x;
++      y = -y;
++      z = 1;
++    }
++  return x + y + z;
++}
++
++/* { dg-final { scan-assembler "b(gt|le)" } } */
+--- a/src//dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/short-it-ifcvt-2.c
+@@ -0,0 +1,21 @@
++/* Test that ifcvt is not being too aggressive when -mrestrict-it.  */
++/* { dg-do compile } */
++/* { dg-options "-O2 -mrestrict-it" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++f1(int x, int y, int z)
++{
++  if (x > 100)
++    {
++      x++;
++      z = -z;
++    }
++  else
++    {
++      x = -x;
++      y = -y;
++    }
++  return x + y + z;
++}
++/* { dg-final { scan-assembler "b(gt|le)" } } */
 --- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
 +++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
 @@ -27,9 +27,22 @@ load_lib gcc-dg.exp
@@ -13152,6 +33245,17 @@
  /* { dg-options "-save-temps -O1 -fno-inline" } */
  /* { dg-add-options arm_neon } */
  
+--- a/src/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
++++ b/src/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
+@@ -1,7 +1,7 @@
+ /* Check that Thumb 16-bit shifts can be if-converted.  */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target arm_thumb2_ok } */
+-/* { dg-options "-O2 -mthumb" } */
++/* { dg-options "-O2 -mthumb -mno-restrict-it" } */
+ 
+ int
+ foo (int a, int b)
 --- a/src//dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/unsigned-float.c
 @@ -0,0 +1,20 @@
@@ -13175,6 +33279,47 @@
 +}
 +
 +/* { dg-final { scan-assembler-not "vcvt.(f32.f64|f64.f32)" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c
++++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c
+@@ -1,6 +1,7 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
++/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp -ffp-contract=off" } */
+ /* { dg-require-effective-target arm_vfp_ok } */
++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+ 
+ extern float fabsf (float);
+ extern float sqrtf (float);
+--- a/src/gcc/testsuite/lib/target-supports.exp
++++ b/src/gcc/testsuite/lib/target-supports.exp
+@@ -4675,6 +4675,27 @@ proc check_effective_target_vect_call_copysignf { } {
+     return $et_vect_call_copysignf_saved
+ }
+ 
++# Return 1 if the target supports hardware square root instructions.
++
++proc check_effective_target_sqrt_insn { } {
++    global et_sqrt_insn_saved
++
++    if [info exists et_sqrt_insn_saved] {
++	verbose "check_effective_target_hw_sqrt: using cached result" 2
++    } else {
++	set et_sqrt_insn_saved 0
++	if { [istarget x86_64-*-*]
++	     || [istarget powerpc*-*-*]
++	     || [istarget aarch64*-*-*]
++	     || ([istarget arm*-*-*] && [check_effective_target_arm_vfp_ok]) } {
++	   set et_sqrt_insn_saved 1
++	}
++    }
++
++    verbose "check_effective_target_hw_sqrt: returning et_sqrt_insn_saved" 2
++    return $et_sqrt_insn_saved
++}
++
+ # Return 1 if the target supports vector sqrtf calls.
+ 
+ proc check_effective_target_vect_call_sqrtf { } {
 --- a/src/gcc/tree-ssa-loop-ivopts.c
 +++ b/src/gcc/tree-ssa-loop-ivopts.c
 @@ -226,6 +226,7 @@ struct cost_pair
@@ -14232,9 +34377,239 @@
    /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
  
       powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
+--- a/src/gcc/tree-ssa-phiopt.c
++++ b/src/gcc/tree-ssa-phiopt.c
+@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3.  If not see
+ static unsigned int tree_ssa_phiopt_worker (bool, bool);
+ static bool conditional_replacement (basic_block, basic_block,
+ 				     edge, edge, gphi *, tree, tree);
++static bool factor_out_conditional_conversion (edge, edge, gphi *, tree, tree);
+ static int value_replacement (basic_block, basic_block,
+ 			      edge, edge, gimple, tree, tree);
+ static bool minmax_replacement (basic_block, basic_block,
+@@ -356,6 +357,19 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
+ 	     node.  */
+ 	  gcc_assert (arg0 != NULL && arg1 != NULL);
+ 
++	  if (factor_out_conditional_conversion (e1, e2, phi, arg0, arg1))
++	    {
++	      /* factor_out_conditional_conversion may create a new PHI in
++		 BB2 and eliminate an existing PHI in BB2.  Recompute values
++		 that may be affected by that change.  */
++	      phis = phi_nodes (bb2);
++	      phi = single_non_singleton_phi_for_edges (phis, e1, e2);
++	      gcc_assert (phi);
++	      arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
++	      arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
++	      gcc_assert (arg0 != NULL && arg1 != NULL);
++	    }
++
+ 	  /* Do the replacement of conditional if it can be done.  */
+ 	  if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ 	    cfgchanged = true;
+@@ -431,6 +445,134 @@ replace_phi_edge_with_variable (basic_block cond_block,
+ 	      bb->index);
+ }
+ 
++/* PR66726: Factor conversion out of COND_EXPR.  If the arguments of the PHI
++   stmt are CONVERT_STMT, factor out the conversion and perform the conversion
++   to the result of PHI stmt.  */
++
++static bool
++factor_out_conditional_conversion (edge e0, edge e1, gphi *phi,
++				   tree arg0, tree arg1)
++{
++  gimple arg0_def_stmt = NULL, arg1_def_stmt = NULL, new_stmt;
++  tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE;
++  tree temp, result;
++  gphi *newphi;
++  gimple_stmt_iterator gsi, gsi_for_def;
++  source_location locus = gimple_location (phi);
++  enum tree_code convert_code;
++
++  /* Handle only PHI statements with two arguments.  TODO: If all
++     other arguments to PHI are INTEGER_CST or if their defining
++     statement have the same unary operation, we can handle more
++     than two arguments too.  */
++  if (gimple_phi_num_args (phi) != 2)
++    return false;
++
++  /* First canonicalize to simplify tests.  */
++  if (TREE_CODE (arg0) != SSA_NAME)
++    {
++      std::swap (arg0, arg1);
++      std::swap (e0, e1);
++    }
++
++  if (TREE_CODE (arg0) != SSA_NAME
++      || (TREE_CODE (arg1) != SSA_NAME
++	  && TREE_CODE (arg1) != INTEGER_CST))
++    return false;
++
++  /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is
++     a conversion.  */
++  arg0_def_stmt = SSA_NAME_DEF_STMT (arg0);
++  if (!is_gimple_assign (arg0_def_stmt)
++      || !gimple_assign_cast_p (arg0_def_stmt))
++    return false;
++
++  /* Use the RHS as new_arg0.  */
++  convert_code = gimple_assign_rhs_code (arg0_def_stmt);
++  new_arg0 = gimple_assign_rhs1 (arg0_def_stmt);
++  if (convert_code == VIEW_CONVERT_EXPR)
++    new_arg0 = TREE_OPERAND (new_arg0, 0);
++
++  if (TREE_CODE (arg1) == SSA_NAME)
++    {
++      /* Check if arg1 is an SSA_NAME and the stmt which defines arg1
++	 is a conversion.  */
++      arg1_def_stmt = SSA_NAME_DEF_STMT (arg1);
++      if (!is_gimple_assign (arg1_def_stmt)
++	  || gimple_assign_rhs_code (arg1_def_stmt) != convert_code)
++	return false;
++
++      /* Use the RHS as new_arg1.  */
++      new_arg1 = gimple_assign_rhs1 (arg1_def_stmt);
++      if (convert_code == VIEW_CONVERT_EXPR)
++	new_arg1 = TREE_OPERAND (new_arg1, 0);
++    }
++  else
++    {
++      /* If arg1 is an INTEGER_CST, fold it to new type.  */
++      if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
++	  && int_fits_type_p (arg1, TREE_TYPE (new_arg0)))
++	{
++	  if (gimple_assign_cast_p (arg0_def_stmt))
++	    new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1);
++	  else
++	    return false;
++	}
++      else
++	return false;
++    }
++
++  /*  If arg0/arg1 have > 1 use, then this transformation actually increases
++      the number of expressions evaluated at runtime.  */
++  if (!has_single_use (arg0)
++      || (arg1_def_stmt && !has_single_use (arg1)))
++    return false;
++
++  /* If types of new_arg0 and new_arg1 are different bailout.  */
++  if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
++    return false;
++
++  /* Create a new PHI stmt.  */
++  result = PHI_RESULT (phi);
++  temp = make_ssa_name (TREE_TYPE (new_arg0), NULL);
++  newphi = create_phi_node (temp, gimple_bb (phi));
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "PHI ");
++      print_generic_expr (dump_file, gimple_phi_result (phi), 0);
++      fprintf (dump_file,
++	       " changed to factor conversion out from COND_EXPR.\n");
++      fprintf (dump_file, "New stmt with CAST that defines ");
++      print_generic_expr (dump_file, result, 0);
++      fprintf (dump_file, ".\n");
++    }
++
++  /* Remove the old cast(s) that has single use.  */
++  gsi_for_def = gsi_for_stmt (arg0_def_stmt);
++  gsi_remove (&gsi_for_def, true);
++  if (arg1_def_stmt)
++    {
++      gsi_for_def = gsi_for_stmt (arg1_def_stmt);
++      gsi_remove (&gsi_for_def, true);
++    }
++
++  add_phi_arg (newphi, new_arg0, e0, locus);
++  add_phi_arg (newphi, new_arg1, e1, locus);
++
++  /* Create the conversion stmt and insert it.  */
++  if (convert_code == VIEW_CONVERT_EXPR)
++    temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp);
++  new_stmt = gimple_build_assign (result,  convert_code, temp);
++  gsi = gsi_after_labels (gimple_bb (phi));
++  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
++
++  /* Remove he original PHI stmt.  */
++  gsi = gsi_for_stmt (phi);
++  gsi_remove (&gsi, true);
++  return true;
++}
++
+ /*  The function conditional_replacement does the main work of doing the
+     conditional replacement.  Return true if the replacement is done.
+     Otherwise return false.
+@@ -2173,6 +2315,26 @@ gate_hoist_loads (void)
+    This pass also performs a fifth transformation of a slightly different
+    flavor.
+ 
++   Factor conversion in COND_EXPR
++   ------------------------------
++
++   This transformation factors the conversion out of COND_EXPR with
++   factor_out_conditional_conversion.
++
++   For example:
++   if (a <= CST) goto <bb 3>; else goto <bb 4>;
++   <bb 3>:
++   tmp = (int) a;
++   <bb 4>:
++   tmp = PHI <tmp, CST>
++
++   Into:
++   if (a <= CST) goto <bb 3>; else goto <bb 4>;
++   <bb 3>:
++   <bb 4>:
++   a = PHI <a, CST>
++   tmp = (int) a;
++
+    Adjacent Load Hoisting
+    ----------------------
+ 
+--- a/src/gcc/tree-vrp.c
++++ b/src/gcc/tree-vrp.c
+@@ -3145,14 +3145,33 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
+ 		 and all numbers from min to 0 for negative min.  */
+ 	      cmp = compare_values (vr0.max, zero);
+ 	      if (cmp == -1)
+-		max = zero;
++		{
++		  /* When vr0.max < 0, vr1.min != 0 and value
++		     ranges for dividend and divisor are available.  */
++		  if (vr1.type == VR_RANGE
++		      && !symbolic_range_p (&vr0)
++		      && !symbolic_range_p (&vr1)
++		      && !compare_values (vr1.min, zero))
++		    max = int_const_binop (code, vr0.max, vr1.min);
++		  else
++		    max = zero;
++		}
+ 	      else if (cmp == 0 || cmp == 1)
+ 		max = vr0.max;
+ 	      else
+ 		type = VR_VARYING;
+ 	      cmp = compare_values (vr0.min, zero);
+ 	      if (cmp == 1)
+-		min = zero;
++		{
++		  /* For unsigned division when value ranges for dividend
++		     and divisor are available.  */
++		  if (vr1.type == VR_RANGE
++		      && !symbolic_range_p (&vr0)
++		      && !symbolic_range_p (&vr1))
++		    min = int_const_binop (code, vr0.min, vr1.max);
++		  else
++		    min = zero;
++		}
+ 	      else if (cmp == 0 || cmp == -1)
+ 		min = vr0.min;
+ 	      else
 --- a/src/libgcc/config.host
 +++ b/src/libgcc/config.host
-@@ -377,14 +377,15 @@ arm*-*-netbsdelf*)
+@@ -382,14 +382,15 @@ arm*-*-netbsdelf*)
  	tmake_file="$tmake_file arm/t-arm arm/t-netbsd t-slibgcc-gld-nover"
  	;;
  arm*-*-linux*)			# ARM GNU/Linux with ELF
@@ -14252,7 +34627,7 @@
  	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
  	tmake_file="${tmake_file} arm/t-bpabi"
  	tm_file="$tm_file arm/bpabi-lib.h"
-@@ -396,7 +397,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*)
+@@ -401,7 +402,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*)
  	tm_file="$tm_file arm/bpabi-lib.h"
  	case ${host} in
  	arm*-*-eabi* | arm*-*-rtems*)
diff -u gcc-5-5.2.1/debian/patches/gdc-5-doc.diff gcc-5-5.2.1/debian/patches/gdc-5-doc.diff
--- gcc-5-5.2.1/debian/patches/gdc-5-doc.diff
+++ gcc-5-5.2.1/debian/patches/gdc-5-doc.diff
@@ -22,25 +22,6 @@
  
  The abbreviation @dfn{GCC} has multiple meanings in common use.  The
  current official meaning is ``GNU Compiler Collection'', which refers
-Index: b/src/gcc/doc/install.texi
-===================================================================
---- a/src/gcc/doc/install.texi
-+++ b/src/gcc/doc/install.texi
-@@ -1547,12 +1547,12 @@ their runtime libraries should be built.
- grep language= */config-lang.in
- @end smallexample
- Currently, you can use any of the following:
--@code{all}, @code{ada}, @code{c}, @code{c++}, @code{fortran},
-+@code{all}, @code{ada}, @code{c}, @code{c++}, @code{d}, @code{fortran},
- @code{go}, @code{java}, @code{objc}, @code{obj-c++}.
- Building the Ada compiler has special requirements, see below.
- If you do not pass this flag, or specify the option @code{all}, then all
- default languages available in the @file{gcc} sub-tree will be configured.
--Ada, Go and Objective-C++ are not default languages; the rest are.
-+Ada, D, Go and Objective-C++ are not default languages; the rest are.
- 
- @item --enable-stage1-languages=@var{lang1},@var{lang2},@dots{}
- Specify that a particular subset of compilers and their runtime
 Index: b/src/gcc/doc/invoke.texi
 ===================================================================
 --- a/src/gcc/doc/invoke.texi
diff -u gcc-5-5.2.1/debian/patches/pr67736.diff gcc-5-5.2.1/debian/patches/pr67736.diff
--- gcc-5-5.2.1/debian/patches/pr67736.diff
+++ gcc-5-5.2.1/debian/patches/pr67736.diff
@@ -1,15 +1,13 @@
-2012-05-06  Andrew Pinski  <apinski@cavium.com>
-  
-  * combine.c (simplify_comparison): Use gen_lowpart_or_truncate instead
-  of gen_lowpart when we had a truncating and.
+2015-10-23  Steve Ellcey  <sellcey@imgtec.com>
+	    Andrew Pinski  <apinski@cavium.com>
 
-  * gcc.c-torture/execute/20110418-1.c: New testcase.
+	PR rtl-optimization/67736
+	* combine.c (simplify_comparison): Use gen_lowpart_or_truncate instead
+	of gen_lowpart.
 
-Index: a/src/gcc/combine.c
-===================================================================
---- a/src/gcc/combine.c	(revision 187203)
-+++ a/src/gcc/combine.c	(working copy)
-@@ -11199,8 +11199,8 @@ simplify_comparison (enum rtx_code code,
+--- a/src/gcc/combine.c
++++ a/src/gcc/combine.c
+@@ -11530,8 +11530,8 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
  		 tmode != GET_MODE (op0); tmode = GET_MODE_WIDER_MODE (tmode))
  	      if ((unsigned HOST_WIDE_INT) c0 == GET_MODE_MASK (tmode))
  		{
@@ -23,34 +21,15 @@
-Index: a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c
-===================================================================
---- a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c	(revision 0)
-+++ a/src/gcc/testsuite/gcc.c-torture/execute/20110418-1.c	(revision 0)
-@@ -0,0 +1,29 @@
-+typedef unsigned long long uint64_t;
-+void f(uint64_t *a, uint64_t aa) __attribute__((noinline));
-+void f(uint64_t *a, uint64_t aa)
-+{
-+  uint64_t new_value = aa;
-+  uint64_t old_value = *a;
-+  int bit_size = 32;
-+    uint64_t mask = (uint64_t)(unsigned)(-1);
-+    uint64_t tmp = old_value & mask;
-+    new_value &= mask;
-+    /* On overflow we need to add 1 in the upper bits */
-+    if (tmp > new_value)
-+        new_value += 1ull<<bit_size;
-+    /* Add in the upper bits from the old value */
-+    new_value += old_value & ~mask;
-+    *a = new_value;
-+}
-+int main(void)
-+{
-+  uint64_t value, new_value, old_value;
-+  value = 0x100000001;
-+  old_value = value;
-+  new_value = (value+1)&(uint64_t)(unsigned)(-1);
-+  f(&value, new_value);
-+  if (value != old_value+1)
-+    __builtin_abort ();
-+  return 0;
-+}
-+
+@@ -12049,12 +12049,9 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
+ 				   & GET_MODE_MASK (mode))
+ 				  + 1)) >= 0
+ 	      && const_op >> i == 0
+-	      && (tmode = mode_for_size (i, MODE_INT, 1)) != BLKmode
+-	      && (TRULY_NOOP_TRUNCATION_MODES_P (tmode, GET_MODE (op0))
+-		  || (REG_P (XEXP (op0, 0))
+-		      && reg_truncated_to_mode (tmode, XEXP (op0, 0)))))
++	      && (tmode = mode_for_size (i, MODE_INT, 1)) != BLKmode)
+ 	    {
+-	      op0 = gen_lowpart (tmode, XEXP (op0, 0));
++	      op0 = gen_lowpart_or_truncate (tmode, XEXP (op0, 0));
+ 	      continue;
+ 	    }
+ 
diff -u gcc-5-5.2.1/debian/patches/svn-updates.diff gcc-5-5.2.1/debian/patches/svn-updates.diff
--- gcc-5-5.2.1/debian/patches/svn-updates.diff
+++ gcc-5-5.2.1/debian/patches/svn-updates.diff
@@ -1,10 +1,10 @@
-# DP: updates from the 5 branch upto 20151021 (r229103).
+# DP: updates from the 5 branch upto 20151028 (r229478).
 
 last_update()
 {
 	cat > ${dir}LAST_UPDATED <EOF
-Wed Oct 21 02:02:40 CEST 2015
-Wed Oct 21 00:02:40 UTC 2015 (revision 229103)
+Wed Oct 28 10:31:27 CET 2015
+Wed Oct 28 09:31:27 UTC 2015 (revision 229478)
 EOF
 }
 
@@ -15824,7 +15824,40 @@
  
    return ret;
  }
-@@ -7558,9 +7557,10 @@
+@@ -7458,9 +7457,9 @@
+ 
+    *PTARGET_BOOL is an optional place to store the boolean success/failure.
+    *PTARGET_OVAL is an optional place to store the old value from memory.
+-   Both target parameters may be NULL to indicate that we do not care about
+-   that return value.  Both target parameters are updated on success to
+-   the actual location of the corresponding result.
++   Both target parameters may be NULL or const0_rtx to indicate that we do
++   not care about that return value.  Both target parameters are updated on
++   success to the actual location of the corresponding result.
+ 
+    MEMMODEL is the memory model variant to use.
+ 
+@@ -7485,6 +7484,9 @@
+   /* Make sure we always have some place to put the return oldval.
+      Further, make sure that place is distinct from the input expected,
+      just in case we need that path down below.  */
++  if (ptarget_oval && *ptarget_oval == const0_rtx)
++    ptarget_oval = NULL;
++
+   if (ptarget_oval == NULL
+       || (target_oval = *ptarget_oval) == NULL
+       || reg_overlap_mentioned_p (expected, target_oval))
+@@ -7495,6 +7497,9 @@
+     {
+       machine_mode bool_mode = insn_data[icode].operand[0].mode;
+ 
++      if (ptarget_bool && *ptarget_bool == const0_rtx)
++	ptarget_bool = NULL;
++
+       /* Make sure we always have a place for the bool operand.  */
+       if (ptarget_bool == NULL
+ 	  || (target_bool = *ptarget_bool) == NULL
+@@ -7558,9 +7563,10 @@
    if (libfunc != NULL)
      {
        rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
@@ -15838,7 +15871,7 @@
  
        /* Compute the boolean return value only if requested.  */
        if (ptarget_bool)
-@@ -7620,7 +7620,7 @@
+@@ -7620,7 +7626,7 @@
  {
    if (HAVE_mem_thread_fence)
      emit_insn (gen_mem_thread_fence (GEN_INT (model)));
@@ -15847,7 +15880,7 @@
      {
        if (HAVE_memory_barrier)
  	emit_insn (gen_memory_barrier ());
-@@ -7644,7 +7644,7 @@
+@@ -7644,7 +7650,7 @@
  {
    if (HAVE_mem_signal_fence)
      emit_insn (gen_mem_signal_fence (GEN_INT (model)));
@@ -15856,7 +15889,7 @@
      {
        /* By default targets are coherent between a thread and the signal
  	 handler running on the same thread.  Thus this really becomes a
-@@ -7699,7 +7699,7 @@
+@@ -7699,7 +7705,7 @@
      target = gen_reg_rtx (mode);
  
    /* For SEQ_CST, emit a barrier before the load.  */
@@ -15865,7 +15898,7 @@
      expand_mem_thread_fence (model);
  
    emit_move_insn (target, mem);
-@@ -7745,7 +7745,7 @@
+@@ -7745,7 +7751,7 @@
  	  if (maybe_expand_insn (icode, 2, ops))
  	    {
  	      /* lock_release is only a release barrier.  */
@@ -15874,7 +15907,7 @@
  		expand_mem_thread_fence (model);
  	      return const0_rtx;
  	    }
-@@ -7772,7 +7772,7 @@
+@@ -7772,7 +7778,7 @@
    emit_move_insn (mem, val);
  
    /* For SEQ_CST, also emit a barrier after the store.  */
@@ -15889,7 +15922,7 @@
 +++ b/src/gcc/DATESTAMP	(.../branches/gcc-5-branch)
 @@ -1 +1 @@
 -20150716
-+20151020
++20151028
 Index: gcc/postreload.c
 ===================================================================
 --- a/src/gcc/postreload.c	(.../tags/gcc_5_2_0_release)
@@ -16008,6 +16041,19 @@
  }
  
  
+Index: gcc/tree.c
+===================================================================
+--- a/src/gcc/tree.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/tree.c	(.../branches/gcc-5-branch)
+@@ -1349,7 +1349,7 @@
+   int i;
+ 
+   for (i = 0; i < TREE_INT_CST_NUNITS (t); i++)
+-    code ^= TREE_INT_CST_ELT (t, i);
++    code = iterative_hash_host_wide_int (TREE_INT_CST_ELT(t, i), code);
+ 
+   return code;
+ }
 Index: gcc/tree.h
 ===================================================================
 --- a/src/gcc/tree.h	(.../tags/gcc_5_2_0_release)
@@ -16598,7 +16644,71 @@
 ===================================================================
 --- a/src/gcc/ChangeLog	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/ChangeLog	(.../branches/gcc-5-branch)
-@@ -1,3 +1,1273 @@
+@@ -1,3 +1,1337 @@
++2015-10-27  Caroline Tice  <cmtice@google.com.
++
++	(from Richard Biener)
++	* tree.c (int_cst_hasher::hash):  Replace XOR with more efficient
++	call to interative_hash_host_wide_int.
++
++2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	Backport from mainline
++	2015-10-26  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	PR middle-end/67989
++	* optabs.c (expand_atomic_compare_and_swap): Handle case when
++	ptarget_oval or ptarget_bool are const0_rtx.
++
++2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	PR target/67929
++	* config/arm/arm.c (vfp3_const_double_for_bits): Rewrite.
++	* config/arm/constraints.md (Dp): Update callsite.
++	* config/arm/predicates.md (const_double_vcvt_power_of_two): Likewise.
++
++2015-10-27  Martin Jambor  <mjambor@suse.cz>
++
++	* tree-sra.c (replace_removed_params_ssa_names): Change type of
++	parameter stmt to gimple.
++
++2015-10-26  Martin Jambor  <mjambor@suse.cz>
++
++	Backport from mainline
++	2015-10-09  Martin Jambor  <mjambor@suse.cz>
++
++	PR tree-optimization/67794
++	* tree-sra.c (replace_removed_params_ssa_names): Do not distinguish
++	between types of statements but accept original definitions as a
++	parameter.
++	(ipa_sra_modify_function_body): Use FOR_EACH_SSA_DEF_OPERAND to
++	iterate over definitions.
++
++2015-10-25  John David Anglin  <danglin@gcc.gnu.org>
++
++	PR middle-end/68079
++	* dojump.c (do_compare_and_jump): Canonicalize both function and
++	method types.
++
++2015-10-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
++
++	Backport from mainline
++	2015-10-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
++
++	PR target/68015
++	* config/s390/s390.md (mov<mode>cc): Emit compare only if we don't
++	already have a comparison result.
++
++2015-10-22  Uros Bizjak  <ubizjak@gmail.com>
++
++	Backport from mainline
++	2015-10-21  Uros Bizjak  <ubizjak@gmail.com>
++
++	PR target/68018
++	* config/i386/i386.c (ix86_compute_frame_layout): Realign the stack
++	for 64-bit MS_ABI targets also when default incoming stack boundary
++	is overriden.
++
 +2015-10-20  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 +
 +	Backport from mainline r229024
@@ -17343,7 +17453,7 @@
 +	Backport from mainline
 +	2015-08-21  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 +
-+    	* config/s390/s390-builtins.def: Fix value range of vec_load_bndry.
++	* config/s390/s390-builtins.def: Fix value range of vec_load_bndry.
 +
 +2015-08-24  Michael Meissner  <meissner@linux.vnet.ibm.com>
 +
@@ -17872,7 +17982,7 @@
  2015-07-16  Release Manager
  
  	* GCC 5.2.0 released.
-@@ -119,8 +1389,8 @@
+@@ -119,8 +1453,8 @@
  2015-07-09  Iain Sandoe  <iain@codesourcery.com>
  
  	PR target/66523
@@ -17883,7 +17993,7 @@
  
  2015-07-06  Alan Lawrence  <alan.lawrence@arm.com>
  
-@@ -181,21 +1451,8 @@
+@@ -181,21 +1515,8 @@
  	* doc/invoke.texi (i386 and x86-64 Options): -mno-fancy-math-387
  	is not actually the default on FreeBSD.
  
@@ -18112,6 +18222,33 @@
 +  /* { dg-final { scan-assembler-not "vnmul\\.f32" } } */
 +  return -a * b;
 +}
+Index: gcc/testsuite/gcc.target/arm/pr67929_1.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.target/arm/pr67929_1.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gcc.target/arm/pr67929_1.c	(.../branches/gcc-5-branch)
+@@ -0,0 +1,21 @@
++/* { dg-do run } */
++/* { dg-require-effective-target arm_vfp3_ok } */
++/* { dg-options "-O2 -fno-inline" } */
++/* { dg-add-options arm_vfp3 } */
++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
++
++int
++foo (float a)
++{
++  return a * 4.9f;
++}
++
++
++int
++main (void)
++{
++  if (foo (10.0f) != 49)
++    __builtin_abort ();
++
++  return 0;
++}
+\ No newline at end of file
 Index: gcc/testsuite/gcc.target/arm/pr66912.c
 ===================================================================
 --- a/src/gcc/testsuite/gcc.target/arm/pr66912.c	(.../tags/gcc_5_2_0_release)
@@ -19342,6 +19479,21 @@
  /* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */
  int
  outer_function (int x, int y)
+Index: gcc/testsuite/gcc.target/i386/pr68018.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.target/i386/pr68018.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gcc.target/i386/pr68018.c	(.../branches/gcc-5-branch)
+@@ -0,0 +1,10 @@
++/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
++/* { dg-options "-O -mabi=ms -mstackrealign" } */
++
++typedef float V __attribute__ ((vector_size (16)));
++
++int fn1 (V * x)
++{
++  V a = *x;
++  return a[0];
++}
 Index: gcc/testsuite/gcc.target/i386/pr66814.c
 ===================================================================
 --- a/src/gcc/testsuite/gcc.target/i386/pr66814.c	(.../tags/gcc_5_2_0_release)
@@ -19524,6 +19676,35 @@
 +  *d = 1;
 +  goto *a;
 +}
+Index: gcc/testsuite/gcc.target/s390/pr68015.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.target/s390/pr68015.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gcc.target/s390/pr68015.c	(.../branches/gcc-5-branch)
+@@ -0,0 +1,24 @@
++/* { dg-compile } */
++/* { dg-options "-O2 -march=z196" } */
++
++extern long useme (long, ...);
++
++void
++foo (void)
++{
++  long secs = useme (41);
++  long utc_secs = useme (42);
++  long h, m;
++
++  utc_secs = useme (42);
++  h = secs / 3600;
++  m = secs / 60;
++  if (utc_secs >= 86400)
++    {
++      m = 59;
++      h--;
++      if (h < 0)
++	h = 23;
++    }
++  useme (h, m);
++}
 Index: gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c
 ===================================================================
 --- a/src/gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c	(.../tags/gcc_5_2_0_release)
@@ -20025,6 +20206,35 @@
 +   type(t), parameter :: vec(*) = [(t(i), i = 1, 4)]
 +   type(t), parameter :: arr(*) = reshape(vec, [2, 2])   ! { dg-error "ranks 1 and 2 in assignment" }
 +end
+Index: gcc/testsuite/gfortran.dg/pr58754.f90
+===================================================================
+--- a/src/gcc/testsuite/gfortran.dg/pr58754.f90	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gfortran.dg/pr58754.f90	(.../branches/gcc-5-branch)
+@@ -0,0 +1,24 @@
++! { dg-do compile }
++!
++! Tests the fix for PR58754
++!
++  type :: char_type
++    character, allocatable :: chr (:)
++  end type
++  character, allocatable :: c(:)
++  type(char_type) :: d
++  character :: t(1) = ["w"]
++
++  allocate (c (1), source = t)
++  if (any (c .ne. t)) call abort
++  c = ["a"]
++  if (any (c .ne. ["a"])) call abort
++  deallocate (c)
++
++! Check allocatable character components, whilst we are about it.
++  allocate (d%chr (2), source = [t, char (ichar (t) + 1)])
++  if (any (d%chr .ne. ["w", "x"])) call abort
++  d%chr = ["a","b","c","d"]
++  if (any (d%chr .ne. ["a","b","c","d"])) call abort
++  deallocate (d%chr)
++end
 Index: gcc/testsuite/gfortran.dg/pr56520.f90
 ===================================================================
 --- a/src/gcc/testsuite/gfortran.dg/pr56520.f90	(.../tags/gcc_5_2_0_release)
@@ -20386,6 +20596,55 @@
 +      data x /t()/      ! Prior to patch, this would ICE.
 +   end block
 +end
+Index: gcc/testsuite/gfortran.dg/move_alloc_16.f90
+===================================================================
+--- a/src/gcc/testsuite/gfortran.dg/move_alloc_16.f90	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gfortran.dg/move_alloc_16.f90	(.../branches/gcc-5-branch)
+@@ -0,0 +1,44 @@
++! { dg-do run }
++!
++! Tests the fix for PR67177 in which MOVE_ALLOC was not assigning the string
++! length for deferred length characters.
++!
++! Contributed by <templed@tcd.ie>
++!
++program str
++  implicit none
++
++  type string
++    character(:), Allocatable :: text
++  end type string
++
++  type strings
++    type(string), allocatable, dimension(:) :: strlist
++  end type strings
++
++  type(strings) :: teststrs
++  type(string) :: tmpstr
++  integer :: strlen = 20
++
++  allocate (teststrs%strlist(1))
++  allocate (character(len=strlen) :: tmpstr%text)
++
++  allocate (character(len=strlen) :: teststrs%strlist(1)%text)
++
++! Full string reference was required because reallocation on assignment is
++! functioning when it should not if the lhs is a substring - PR67977
++  tmpstr%text(1:3) = 'foo'
++
++  if (.not.allocated (teststrs%strlist(1)%text)) call abort
++  if (len (tmpstr%text) .ne. strlen) call abort
++
++  call move_alloc(tmpstr%text,teststrs%strlist(1)%text)
++
++  if (.not.allocated (teststrs%strlist(1)%text)) call abort
++  if (len (teststrs%strlist(1)%text) .ne. strlen) call abort
++  if (trim (teststrs%strlist(1)%text(1:3)) .ne. 'foo') call abort
++
++! Clean up so that valgrind reports all allocated memory freed.
++  if (allocated (teststrs%strlist(1)%text)) deallocate (teststrs%strlist(1)%text)
++  if (allocated (teststrs%strlist)) deallocate (teststrs%strlist)
++end program str
 Index: gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_1.f03
 ===================================================================
 --- a/src/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_1.f03	(.../tags/gcc_5_2_0_release)
@@ -20661,6 +20920,33 @@
 +
 +end program alloc_comp_copy_test
 +
+Index: gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90
+===================================================================
+--- a/src/gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gfortran.dg/deferred_character_assignment_1.f90	(.../branches/gcc-5-branch)
+@@ -0,0 +1,22 @@
++! { dg-do run }
++!
++! Checks the fix for PR67977 in which automatic reallocation on assignment
++! was performed when the lhs had a substring reference.
++!
++! Contributed by Anton Shterenlikht  <mexas@bristol.ac.uk>
++!
++  character(:), allocatable :: z
++  integer :: length
++  z = "cockatoo"
++  length = len (z)
++  z(:) = ''
++  if (len(z) .ne. length) call abort
++  if (trim (z) .ne. '') call abort
++  z(:3) = "foo"
++  if (len(z) .ne. length) call abort
++  if (trim (z) .ne. "foo") call abort
++  z(4:) = "__bar"
++  if (len(z) .ne. length) call abort
++  if (trim (z) .ne. "foo__bar") call abort
++  deallocate (z)
++end
 Index: gcc/testsuite/gfortran.dg/coarray_collectives_16.f90
 ===================================================================
 --- a/src/gcc/testsuite/gfortran.dg/coarray_collectives_16.f90	(.../tags/gcc_5_2_0_release)
@@ -20775,6 +21061,99 @@
 +    end select
 +  end subroutine
 +end program
+Index: gcc/testsuite/gfortran.dg/move_alloc_15.f90
+===================================================================
+--- a/src/gcc/testsuite/gfortran.dg/move_alloc_15.f90	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gfortran.dg/move_alloc_15.f90	(.../branches/gcc-5-branch)
+@@ -0,0 +1,88 @@
++! { dg-do run }
++! { dg-options "-fdump-tree-original" }
++!
++! Fix for PR......
++!
++! The 'to' components of 'mytemp' would remain allocated after the call to
++! MOVE_ALLOC, resulting in memory leaks.
++!
++! Contributed by Alberto Luaces.
++!
++! See https://groups.google.com/forum/#!topic/comp.lang.fortran/k3bkKUbOpFU
++!
++module alloctest
++  type myallocatable
++     integer, allocatable:: i(:)
++  end type myallocatable
++
++contains
++  subroutine f(num, array)
++    implicit none
++    integer, intent(in) :: num
++    integer :: i
++    type(myallocatable):: array(:)
++
++    do i = 1, num
++       allocate(array(i)%i(5), source = [1,2,3,4,5])
++    end do
++
++  end subroutine f
++end module alloctest
++
++program name
++  use alloctest
++  implicit none
++  type(myallocatable), allocatable:: myarray(:), mytemp(:)
++  integer, parameter:: OLDSIZE = 7, NEWSIZE = 20
++  logical :: flag
++
++  allocate(myarray(OLDSIZE))
++  call f(size(myarray), myarray)
++
++  allocate(mytemp(NEWSIZE))
++  mytemp(1:OLDSIZE) = myarray
++
++  flag = .false.
++  call foo
++  call bar
++
++  deallocate(myarray)
++  if (allocated (mytemp)) deallocate (mytemp)
++
++  allocate(myarray(OLDSIZE))
++  call f(size(myarray), myarray)
++
++  allocate(mytemp(NEWSIZE))
++  mytemp(1:OLDSIZE) = myarray
++
++! Verfify that there is no segfault if the allocatable components
++! are deallocated before the call to move_alloc
++  flag = .true.
++  call foo
++  call bar
++
++  deallocate(myarray)
++contains
++  subroutine foo
++    integer :: i
++    if (flag) then
++      do i = 1, OLDSIZE
++        deallocate (mytemp(i)%i)
++      end do
++    end if
++    call move_alloc(mytemp, myarray)
++  end subroutine
++
++  subroutine bar
++    integer :: i
++    do i = 1, OLDSIZE
++      if (.not.flag .and. allocated (myarray(i)%i)) then
++        if (any (myarray(i)%i .ne. [1,2,3,4,5])) call abort
++      else
++        if (.not.flag) call abort
++      end if
++    end do
++  end subroutine
++end program name
++! { dg-final { scan-tree-dump-times "__builtin_malloc" 11 "original" } }
++! { dg-final { scan-tree-dump-times "__builtin_free" 11 "original" } }
 Index: gcc/testsuite/gfortran.dg/char_length_2.f90
 ===================================================================
 --- a/src/gcc/testsuite/gfortran.dg/char_length_2.f90	(.../tags/gcc_5_2_0_release)
@@ -21711,6 +22090,26 @@
 +  asm volatile ("" : : "g" (b) : "memory");
 +  return 0;
 +}
+Index: gcc/testsuite/gcc.dg/torture/pr67794.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.dg/torture/pr67794.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gcc.dg/torture/pr67794.c	(.../branches/gcc-5-branch)
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++
++int *b;
++static void fn1(int *best, int *dmin) {
++  int a[64];
++  dmin = a;
++  __asm__ volatile("" : "+&r"(dmin) : ""(best));
++}
++
++__attribute__((always_inline)) static inline void fn2(int *best) { fn1(best, b); }
++
++void fn3(void) {
++  int c[1];
++  fn2(c);
++}
 Index: gcc/testsuite/gcc.dg/torture/pr67442.c
 ===================================================================
 --- a/src/gcc/testsuite/gcc.dg/torture/pr67442.c	(.../tags/gcc_5_2_0_release)
@@ -22029,6 +22428,45 @@
 +  fn1 (**f); 
 +  return 0;
 +}
+Index: gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c	(.../branches/gcc-5-branch)
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details"  } */
++
++extern void consume (int);
++extern int glob, glob1, glob2;
++extern int get (void);
++
++
++static void __attribute__ ((noinline))
++foo (int a)
++{
++  a = glob;
++  consume (a);
++  a = get ();
++  consume (a);
++  __asm__ volatile("" : : ""(a));
++  consume (a);
++
++  if (glob1)
++    a = glob1;
++  else
++    a = glob2;
++  consume (a);
++}
++
++int
++bar (int a)
++{
++  foo (a);
++  glob = a;
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "replacing an SSA name of a removed param" 4 "eipa_sra" } } */
 Index: gcc/testsuite/gcc.dg/pr67028.c
 ===================================================================
 --- a/src/gcc/testsuite/gcc.dg/pr67028.c	(.../tags/gcc_5_2_0_release)
@@ -22059,7 +22497,59 @@
 ===================================================================
 --- a/src/gcc/testsuite/ChangeLog	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/testsuite/ChangeLog	(.../branches/gcc-5-branch)
-@@ -1,3 +1,797 @@
+@@ -1,3 +1,849 @@
++2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	Backport from mainline
++	2015-10-26  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	PR middle-end/67989
++	* g++.dg/pr67989.C: New test.
++
++2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
++
++	PR target/67929
++	* gcc.target/arm/pr67929_1.c: New test.
++
++2015-10-26  Paul Thomas  <pault@gcc.gnu.org>
++
++	Backport from mainline r228940:
++	PR fortran/67177
++	* gfortran.dg/move_alloc_15.f90: New test
++	* gfortran.dg/move_alloc_16.f90: New test
++
++	PR fortran/67977
++	* gfortran.dg/deferred_character_assignment_1.f90: New test
++
++2015-10-26  Martin Jambor  <mjambor@suse.cz>
++
++	Backport from mainline
++	2015-10-09  Martin Jambor  <mjambor@suse.cz>
++
++        * gcc.dg/ipa/ipa-sra-10.c: New test.
++        * gcc.dg/torture/pr67794.c: Likewise.
++
++2015-10-22  Paul Thomas  <pault@gcc.gnu.org>
++
++	PR fortran/58754
++	* gfortran.dg/pr58754.f90: New test
++
++2015-10-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
++
++	Backport from mainline
++	2015-10-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
++
++	PR target/68015
++	* gcc.target/s390/pr68015.c: New test.
++
++2015-10-22  Uros Bizjak  <ubizjak@gmail.com>
++
++	Backport from mainline
++	2015-10-21  Uros Bizjak  <ubizjak@gmail.com>
++
++	PR target/68018
++	* gcc.target/i386/pr68018.c: New test.
++
 +2015-10-20  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 +
 +	Backport from mainline r229024
@@ -22272,8 +22762,8 @@
 +
 +2015-04-28  Kirill Yukhin  <kirill.yukhin@intel.com>
 +
-+        * gcc.target/i386/avx512vbmi-check.h (main): Fix register
-+        name while checking for AVX-512VBMI presence.
++	* gcc.target/i386/avx512vbmi-check.h (main): Fix register
++	name while checking for AVX-512VBMI presence.
 +
 +2015-09-25  Steven G. Kargl  <kargl@gcc.gnu.org>
 +
@@ -22485,7 +22975,7 @@
 +	Backport from mainline
 +	2015-08-21  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 +
-+    	* gcc.target/s390/zvector/vec-load_bndry-1.c: New test.
++	* gcc.target/s390/zvector/vec-load_bndry-1.c: New test.
 +
 +2015-08-24  Michael Meissner  <meissner@linux.vnet.ibm.com>
 +
@@ -22622,9 +23112,9 @@
 +	backport from trunk:
 +	2015-07-21  Alex Velenko  <Alex.Velenko@arm.com>
 +
-+        * gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if):
++	* gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if):
 +	Skip -march=armv4t.
-+        (dg-additional-options): Set armv5t flag.
++	(dg-additional-options): Set armv5t flag.
 +
 +2015-07-25  Tom de Vries  <tom@codesourcery.com>
 +
@@ -22857,7 +23347,7 @@
  2015-07-16  Release Manager
  
  	* GCC 5.2.0 released.
-@@ -792,7 +1586,7 @@
+@@ -792,7 +1638,7 @@
  	Add missing ChangeLog entry for r222341.
  
  	Backport from trunk r222273
@@ -23062,6 +23552,38 @@
 +int main() {
 +  static_assert(A(42), "");
 +}
+Index: gcc/testsuite/g++.dg/cpp0x/anon-union1.C
+===================================================================
+--- a/src/gcc/testsuite/g++.dg/cpp0x/anon-union1.C	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/g++.dg/cpp0x/anon-union1.C	(.../branches/gcc-5-branch)
+@@ -0,0 +1,27 @@
++// PR c++/66583
++// { dg-do run { target c++11 } }
++
++template <class T>
++T&& move(T& t) { return static_cast<T&&>(t); }
++
++struct A {
++  A() { };
++  A(const A&) { }
++};
++
++struct B {
++  union {
++    int m_1 = 0;
++    int m_2;
++  };
++  A dummy;
++};
++
++int main()
++{
++  B b;
++  b.m_1 = 1;
++  B c = move(b);
++  if (c.m_1 != 1)
++    __builtin_abort();
++}
 Index: gcc/testsuite/g++.dg/cpp0x/alignas2.C
 ===================================================================
 --- a/src/gcc/testsuite/g++.dg/cpp0x/alignas2.C	(.../tags/gcc_5_2_0_release)
@@ -23136,6 +23658,86 @@
 +void *operator new[](std::size_t, std::nothrow_t &) noexcept {
 +    return vespamalloc::_GmemP->malloc(0);
 +}
+Index: gcc/testsuite/g++.dg/pr67989.C
+===================================================================
+--- a/src/gcc/testsuite/g++.dg/pr67989.C	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/g++.dg/pr67989.C	(.../branches/gcc-5-branch)
+@@ -0,0 +1,75 @@
++/* { dg-do compile } */
++/* { dg-options "-std=c++11 -O2" } */
++/* { dg-additional-options "-marm -march=armv4t" { target arm*-*-* } } */
++
++__extension__ typedef unsigned long long int uint64_t;
++namespace std __attribute__ ((__visibility__ ("default")))
++{
++  typedef enum memory_order
++  {
++    memory_order_seq_cst
++  } memory_order;
++}
++
++namespace std __attribute__ ((__visibility__ ("default")))
++{
++  template < typename _Tp > struct atomic
++  {
++    static constexpr int _S_min_alignment
++      = (sizeof (_Tp) & (sizeof (_Tp) - 1)) || sizeof (_Tp) > 16
++      ? 0 : sizeof (_Tp);
++    static constexpr int _S_alignment
++      = _S_min_alignment > alignof (_Tp) ? _S_min_alignment : alignof (_Tp);
++      alignas (_S_alignment) _Tp _M_i;
++    operator  _Tp () const noexcept
++    {
++      return load ();
++    }
++    _Tp load (memory_order __m = memory_order_seq_cst) const noexcept
++    {
++      _Tp tmp;
++        __atomic_load (&_M_i, &tmp, __m);
++    }
++  };
++}
++
++namespace lldb_private
++{
++  namespace imp
++  {
++  }
++  class Address;
++}
++namespace lldb
++{
++  typedef uint64_t addr_t;
++  class SBSection
++  {
++  };
++  class SBAddress
++  {
++    void SetAddress (lldb::SBSection section, lldb::addr_t offset);
++      lldb_private::Address & ref ();
++  };
++}
++namespace lldb_private
++{
++  class Address
++  {
++  public:
++    const Address & SetOffset (lldb::addr_t offset)
++    {
++      bool changed = m_offset != offset;
++    }
++    std::atomic < lldb::addr_t > m_offset;
++  };
++}
++
++using namespace lldb;
++using namespace lldb_private;
++void
++SBAddress::SetAddress (lldb::SBSection section, lldb::addr_t offset)
++{
++  Address & addr = ref ();
++  addr.SetOffset (offset);
++}
 Index: gcc/testsuite/g++.dg/cpp1y/var-templ43.C
 ===================================================================
 --- a/src/gcc/testsuite/g++.dg/cpp1y/var-templ43.C	(.../tags/gcc_5_2_0_release)
@@ -23624,6 +24226,61 @@
 +  for (I i = x; i < y; ++i)	// { dg-error "no match for" }
 +    ;
 +}
+Index: gcc/testsuite/g++.dg/init/elide3.C
+===================================================================
+--- a/src/gcc/testsuite/g++.dg/init/elide3.C	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/testsuite/g++.dg/init/elide3.C	(.../branches/gcc-5-branch)
+@@ -0,0 +1,50 @@
++// PR c++/67557
++// { dg-do run }
++
++namespace std
++{
++  struct string
++  {
++    typedef unsigned long size_type;
++    const char* _M_p;
++    char        _M_local_buf[1];
++
++    string(const char* s) : _M_p(_M_local_buf)
++    {
++      __builtin_printf("%p constructed\n", this);
++    }
++
++    string(const string& s) : _M_p(_M_local_buf)
++    {
++      __builtin_printf("%p copied from %p\n", this, &s);
++    }
++
++    ~string()
++    {
++      __builtin_printf("%p destroyed\n", this);
++      if (_M_p != _M_local_buf)
++	__builtin_abort();
++    }
++  };
++}
++
++struct StartTag
++{
++  explicit StartTag(std::string const & tag) : tag_(tag), keepempty_(false) {}
++  std::string tag_;
++  bool keepempty_;
++};
++
++StartTag fontToStartTag() { return StartTag(""); }
++
++struct FontTag : public StartTag
++{
++  FontTag() : StartTag(fontToStartTag()) {}
++};
++
++int main()
++{
++  FontTag x;
++  __builtin_printf("%p x.tag_ in main()\n", &x.tag_);
++  return 0;
++}
 Index: gcc/testsuite/g++.dg/init/pr66857.C
 ===================================================================
 --- a/src/gcc/testsuite/g++.dg/init/pr66857.C	(.../tags/gcc_5_2_0_release)
@@ -23873,6 +24530,50 @@
 +    x += 16, y += x & 15, z += x & 15;
 +  return x + y + z;
 +}
+Index: gcc/cp/init.c
+===================================================================
+--- a/src/gcc/cp/init.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/cp/init.c	(.../branches/gcc-5-branch)
+@@ -851,6 +851,18 @@
+   return list;
+ }
+ 
++/* Return the innermost aggregate scope for FIELD, whether that is
++   the enclosing class or an anonymous aggregate within it.  */
++
++static tree
++innermost_aggr_scope (tree field)
++{
++  if (ANON_AGGR_TYPE_P (TREE_TYPE (field)))
++    return TREE_TYPE (field);
++  else
++    return DECL_CONTEXT (field);
++}
++
+ /* The MEM_INITS are a TREE_LIST.  The TREE_PURPOSE of each list gives
+    a FIELD_DECL or BINFO in T that needs initialization.  The
+    TREE_VALUE gives the initializer, or list of initializer arguments.
+@@ -994,7 +1006,7 @@
+ 
+ 	  /* See if this field is a member of a union, or a member of a
+ 	     structure contained in a union, etc.  */
+-	  for (ctx = DECL_CONTEXT (field);
++	  for (ctx = innermost_aggr_scope (field);
+ 	       !same_type_p (ctx, t);
+ 	       ctx = TYPE_CONTEXT (ctx))
+ 	    if (TREE_CODE (ctx) == UNION_TYPE
+@@ -1027,8 +1039,9 @@
+ 	       union { struct { int i; int j; }; };
+ 
+ 	     initializing both `i' and `j' makes sense.  */
+-	  ctx = common_enclosing_class (DECL_CONTEXT (field),
+-					DECL_CONTEXT (TREE_PURPOSE (*last_p)));
++	  ctx = common_enclosing_class
++	    (innermost_aggr_scope (field),
++	     innermost_aggr_scope (TREE_PURPOSE (*last_p)));
+ 
+ 	  if (ctx && TREE_CODE (ctx) == UNION_TYPE)
+ 	    {
 Index: gcc/cp/class.c
 ===================================================================
 --- a/src/gcc/cp/class.c	(.../tags/gcc_5_2_0_release)
@@ -24007,7 +24708,18 @@
 ===================================================================
 --- a/src/gcc/cp/ChangeLog	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/cp/ChangeLog	(.../branches/gcc-5-branch)
-@@ -1,3 +1,128 @@
+@@ -1,3 +1,139 @@
++2015-10-20  Jason Merrill  <jason@redhat.com>
++
++	PR c++/66583
++	* init.c (innermost_aggr_scope): New.
++	(sort_mem_initializers): Use it.
++
++	PR c++/67557
++	* call.c (is_base_field_ref): New.
++	(unsafe_copy_elision_p): New.
++	(build_over_call): Use it.
++
 +2015-09-12  Paolo Carlini  <paolo.carlini@oracle.com>
 +
 +	Backport from mainline
@@ -24385,6 +25097,61 @@
        || (n = tree_to_shwi (num)) <= 0
        || (int) n != n)
      {
+Index: gcc/cp/call.c
+===================================================================
+--- a/src/gcc/cp/call.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/cp/call.c	(.../branches/gcc-5-branch)
+@@ -7002,6 +7002,39 @@
+   return r;
+ }
+ 
++/* Return true iff T refers to a base field.  */
++
++static bool
++is_base_field_ref (tree t)
++{
++  STRIP_NOPS (t);
++  if (TREE_CODE (t) == ADDR_EXPR)
++    t = TREE_OPERAND (t, 0);
++  if (TREE_CODE (t) == COMPONENT_REF)
++    t = TREE_OPERAND (t, 1);
++  if (TREE_CODE (t) == FIELD_DECL)
++    return DECL_FIELD_IS_BASE (t);
++  return false;
++}
++
++/* We can't elide a copy from a function returning by value to a base
++   subobject, as the callee might clobber tail padding.  Return true iff this
++   could be that case.  */
++
++static bool
++unsafe_copy_elision_p (tree target, tree exp)
++{
++  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (exp));
++  if (type == CLASSTYPE_AS_BASE (type))
++    return false;
++  if (!is_base_field_ref (target)
++      && resolves_to_fixed_type_p (target, NULL))
++    return false;
++  tree init = TARGET_EXPR_INITIAL (exp);
++  return (TREE_CODE (init) == AGGR_INIT_EXPR
++	  && !AGGR_INIT_VIA_CTOR_P (init));
++}
++
+ /* Subroutine of the various build_*_call functions.  Overload resolution
+    has chosen a winning candidate CAND; build up a CALL_EXPR accordingly.
+    ARGS is a TREE_LIST of the unconverted arguments to the call.  FLAGS is a
+@@ -7419,7 +7452,9 @@
+ 	  else if (trivial)
+ 	    return force_target_expr (DECL_CONTEXT (fn), arg, complain);
+ 	}
+-      else if (TREE_CODE (arg) == TARGET_EXPR || trivial)
++      else if (trivial
++	       || (TREE_CODE (arg) == TARGET_EXPR
++		   && !unsafe_copy_elision_p (fa, arg)))
+ 	{
+ 	  tree to = stabilize_reference (cp_build_indirect_ref (fa, RO_NULL,
+ 								complain));
 Index: gcc/cp/cvt.c
 ===================================================================
 --- a/src/gcc/cp/cvt.c	(.../tags/gcc_5_2_0_release)
@@ -24460,6 +25227,29 @@
          && TREE_CODE (BLOCK_SUPERCONTEXT (block)) == FUNCTION_DECL))
      threshold /= 10;
    if (size > threshold)
+Index: gcc/dojump.c
+===================================================================
+--- a/src/gcc/dojump.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/dojump.c	(.../branches/gcc-5-branch)
+@@ -1217,12 +1217,12 @@
+      If one side isn't, we want a noncanonicalized comparison.  See PR
+      middle-end/17564.  */
+   if (HAVE_canonicalize_funcptr_for_compare
+-      && TREE_CODE (TREE_TYPE (treeop0)) == POINTER_TYPE
+-      && TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0)))
+-          == FUNCTION_TYPE
+-      && TREE_CODE (TREE_TYPE (treeop1)) == POINTER_TYPE
+-      && TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1)))
+-          == FUNCTION_TYPE)
++      && POINTER_TYPE_P (TREE_TYPE (treeop0))
++      && POINTER_TYPE_P (TREE_TYPE (treeop1))
++      && (TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0))) == FUNCTION_TYPE
++	  || TREE_CODE (TREE_TYPE (TREE_TYPE (treeop0))) == METHOD_TYPE)
++      && (TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1))) == FUNCTION_TYPE
++	  || TREE_CODE (TREE_TYPE (TREE_TYPE (treeop1))) == METHOD_TYPE))
+     {
+       rtx new_op0 = gen_reg_rtx (mode);
+       rtx new_op1 = gen_reg_rtx (mode);
 Index: gcc/lto-cgraph.c
 ===================================================================
 --- a/src/gcc/lto-cgraph.c	(.../tags/gcc_5_2_0_release)
@@ -24882,7 +25672,26 @@
      {
        /* Take care about non-array allocatable components here.  The alloc_*
  	 routine below is motivated by the alloc_scalar_allocatable_for_
-@@ -8975,7 +9013,6 @@
+@@ -8634,6 +8672,7 @@
+   tree jump_label1;
+   tree jump_label2;
+   gfc_se lse;
++  gfc_ref *ref;
+ 
+   if (!expr1 || expr1->rank)
+     return;
+@@ -8641,6 +8680,10 @@
+   if (!expr2 || expr2->rank)
+     return;
+ 
++  for (ref = expr1->ref; ref; ref = ref->next)
++    if (ref->type == REF_SUBSTRING)
++      return;
++
+   realloc_lhs_warning (expr2->ts.type, false, &expr2->where);
+ 
+   /* Since this is a scalar lhs, we can afford to do this.  That is,
+@@ -8975,7 +9018,6 @@
    scalar_to_array = (expr2->ts.type == BT_DERIVED
  		       && expr2->ts.u.derived->attr.alloc_comp
  		       && !expr_is_variable (expr2)
@@ -24890,7 +25699,7 @@
  		       && expr1->rank && !expr2->rank);
    scalar_to_array |= (expr1->ts.type == BT_DERIVED
  				    && expr1->rank
-@@ -8984,7 +9021,7 @@
+@@ -8984,7 +9026,7 @@
    if (scalar_to_array && dealloc)
      {
        tmp = gfc_deallocate_alloc_comp_no_caf (expr2->ts.u.derived, rse.expr, 0);
@@ -25256,7 +26065,31 @@
 ===================================================================
 --- a/src/gcc/fortran/ChangeLog	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/fortran/ChangeLog	(.../branches/gcc-5-branch)
-@@ -1,3 +1,189 @@
+@@ -1,3 +1,213 @@
++2015-10-26  Paul Thomas  <pault@gcc.gnu.org>
++
++	PR fortran/67177
++	PR fortran/67977
++	Backport from mainline r228940:
++	* primary.c (match_substring): Add an argument 'deferred' to
++	flag that a substring reference with null start and end should
++	not be optimized away for deferred length strings.
++	(match_string_constant, gfc_match_rvalue): Set the argument.
++	* trans-expr.c (alloc_scalar_allocatable_for_assignment): If
++	there is a substring reference return.
++	* trans-intrinsic.c (conv_intrinsic_move_alloc): For deferred
++	characters, assign the 'from' string length to the 'to' string
++	length. If the 'from' expression is deferred, set its string
++	length to zero. If the 'to' expression has allocatable
++	components, deallocate them.
++
++2015-10-22  Paul Thomas  <pault@gcc.gnu.org>
++
++	PR fortran/58754
++	* trans-stmt.c (gfc_trans_allocate): Do not use the scalar
++	character assignment if the allocate expression is an array
++	descriptor.
++
 +2015-10-19  Steven G. Kargl  <kargl@gcc.gnu.org>
 +
 +	PR fortran/67900
@@ -25272,7 +26105,7 @@
 +
 +	PR fortran/67987
 +	* decl.c (char_len_param_value): Unwrap unlong line.  If LEN < 0,
-+	force it to zero per the Fortran 90, 95, 2003, and 2008 Standards. 
++	force it to zero per the Fortran 90, 95, 2003, and 2008 Standards.
 +	* resolve.c (gfc_resolve_substring_charlen): Unwrap unlong line.
 +	If 'start' is larger than 'end', length of substring is negative,
 +	so explicitly set it to zero.
@@ -25446,7 +26279,7 @@
  2015-07-16  Release Manager
  
  	* GCC 5.2.0 released.
-@@ -94,7 +280,7 @@
+@@ -94,7 +304,7 @@
  2015-05-19  Steven G. Kargl  <kargl@gcc.gnu.org>
  
  	PR fortran/66052
@@ -25455,7 +26288,7 @@
  
  2015-05-19  Steven G. Kargl  <kargl@gcc.gnu.org>
  
-@@ -106,7 +292,7 @@
+@@ -106,7 +316,7 @@
  
  	PR fortran/66044
  	* decl.c(gfc_match_entry):  Change a gfc_internal_error() into
@@ -25486,6 +26319,16 @@
  	    }
  	}
  
+@@ -5618,7 +5629,8 @@
+ 	      tmp = gfc_copy_class_to_class (expr3, to,
+ 					     nelems, upoly_expr);
+ 	    }
+-	  else if (code->expr3->ts.type == BT_CHARACTER)
++	  else if (code->expr3->ts.type == BT_CHARACTER
++		   && !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr)))
+ 	    {
+ 	      tmp = INDIRECT_REF_P (se.expr) ?
+ 			se.expr :
 Index: gcc/fortran/expr.c
 ===================================================================
 --- a/src/gcc/fortran/expr.c	(.../tags/gcc_5_2_0_release)
@@ -26046,6 +26889,33 @@
 ===================================================================
 --- a/src/gcc/fortran/primary.c	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/fortran/primary.c	(.../branches/gcc-5-branch)
+@@ -748,7 +748,7 @@
+ /* Match a substring reference.  */
+ 
+ static match
+-match_substring (gfc_charlen *cl, int init, gfc_ref **result)
++match_substring (gfc_charlen *cl, int init, gfc_ref **result, bool deferred)
+ {
+   gfc_expr *start, *end;
+   locus old_loc;
+@@ -800,7 +800,7 @@
+     }
+ 
+   /* Optimize away the (:) reference.  */
+-  if (start == NULL && end == NULL)
++  if (start == NULL && end == NULL && !deferred)
+     ref = NULL;
+   else
+     {
+@@ -1098,7 +1098,7 @@
+   if (ret != -1)
+     gfc_internal_error ("match_string_constant(): Delimiter not found");
+ 
+-  if (match_substring (NULL, 0, &e->ref) != MATCH_NO)
++  if (match_substring (NULL, 0, &e->ref, false) != MATCH_NO)
+     e->expr_type = EXPR_SUBSTRING;
+ 
+   *result = e;
 @@ -1202,6 +1202,9 @@
        return MATCH_ERROR;
      }
@@ -26056,7 +26926,17 @@
    if (!gfc_numeric_ts (&sym->value->ts))
      {
        gfc_error ("Numeric PARAMETER required in complex constant at %C");
-@@ -2642,7 +2645,7 @@
+@@ -2078,7 +2081,8 @@
+ 
+   if (primary->ts.type == BT_CHARACTER)
+     {
+-      switch (match_substring (primary->ts.u.cl, equiv_flag, &substring))
++      bool def = primary->ts.deferred == 1;
++      switch (match_substring (primary->ts.u.cl, equiv_flag, &substring, def))
+ 	{
+ 	case MATCH_YES:
+ 	  if (tail == NULL)
+@@ -2642,7 +2646,7 @@
    gfc_expr *e;
    gfc_symtree *symtree;
  
@@ -26065,6 +26945,15 @@
  
    e = gfc_get_expr ();
    e->symtree = symtree;
+@@ -3091,7 +3095,7 @@
+ 	     that we're not sure is a variable yet.  */
+ 
+ 	  if ((implicit_char || sym->ts.type == BT_CHARACTER)
+-	      && match_substring (sym->ts.u.cl, 0, &e->ref) == MATCH_YES)
++	      && match_substring (sym->ts.u.cl, 0, &e->ref, false) == MATCH_YES)
+ 	    {
+ 
+ 	      e->expr_type = EXPR_VARIABLE;
 Index: gcc/fortran/trans-intrinsic.c
 ===================================================================
 --- a/src/gcc/fortran/trans-intrinsic.c	(.../tags/gcc_5_2_0_release)
@@ -26078,6 +26967,56 @@
        fndecl = build_call_expr_loc (input_location, fndecl, 8, array, opr, opr_flags,
  				    image_index, stat, errmsg, strlen, errmsg_len);
      }
+@@ -9360,6 +9360,16 @@
+ 	    }
+ 	}
+ 
++      if (to_expr->ts.type == BT_CHARACTER && to_expr->ts.deferred)
++	{
++	  gfc_add_modify_loc (input_location, &block, to_se.string_length,
++			      fold_convert (TREE_TYPE (to_se.string_length),
++					    from_se.string_length));
++	  if (from_expr->ts.deferred)
++	    gfc_add_modify_loc (input_location, &block, from_se.string_length,
++			build_int_cst (TREE_TYPE (from_se.string_length), 0));
++	}
++
+       return gfc_finish_block (&block);
+     }
+ 
+@@ -9459,6 +9469,14 @@
+     }
+   else
+     {
++      if (to_expr->ts.type == BT_DERIVED
++	  && to_expr->ts.u.derived->attr.alloc_comp)
++	{
++	  tmp = gfc_deallocate_alloc_comp (to_expr->ts.u.derived,
++					   to_se.expr, to_expr->rank);
++	  gfc_add_expr_to_block (&block, tmp);
++	}
++
+       tmp = gfc_conv_descriptor_data_get (to_se.expr);
+       tmp = gfc_deallocate_with_status (tmp, NULL_TREE, NULL_TREE, NULL_TREE,
+ 					NULL_TREE, true, to_expr, false);
+@@ -9473,6 +9491,17 @@
+   gfc_add_modify_loc (input_location, &block, tmp,
+ 		      fold_convert (TREE_TYPE (tmp), null_pointer_node));
+ 
++
++  if (to_expr->ts.type == BT_CHARACTER && to_expr->ts.deferred)
++    {
++      gfc_add_modify_loc (input_location, &block, to_se.string_length,
++			  fold_convert (TREE_TYPE (to_se.string_length),
++					from_se.string_length));
++      if (from_expr->ts.deferred)
++        gfc_add_modify_loc (input_location, &block, from_se.string_length,
++			build_int_cst (TREE_TYPE (from_se.string_length), 0));
++    }
++
+   return gfc_finish_block (&block);
+ }
+ 
 Index: gcc/fortran/simplify.c
 ===================================================================
 --- a/src/gcc/fortran/simplify.c	(.../tags/gcc_5_2_0_release)
@@ -27629,6 +28568,146 @@
    cfun->has_local_explicit_reg_vars = false;
  
    /* Remove unmarked local and global vars from local_decls.  */
+Index: gcc/tree-sra.c
+===================================================================
+--- a/src/gcc/tree-sra.c	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/tree-sra.c	(.../branches/gcc-5-branch)
+@@ -4580,61 +4580,45 @@
+   return NULL;
+ }
+ 
+-/* If the statement STMT defines an SSA_NAME of a parameter which is to be
+-   removed because its value is not used, replace the SSA_NAME with a one
+-   relating to a created VAR_DECL together all of its uses and return true.
+-   ADJUSTMENTS is a pointer to an adjustments vector.  */
++/* If OLD_NAME, which is being defined by statement STMT, is an SSA_NAME of a
++   parameter which is to be removed because its value is not used, create a new
++   SSA_NAME relating to a replacement VAR_DECL, replace all uses of the
++   original with it and return it.  If there is no need to re-map, return NULL.
++   ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments.  */
+ 
+-static bool
+-replace_removed_params_ssa_names (gimple stmt,
++static tree
++replace_removed_params_ssa_names (tree old_name, gimple stmt,
+ 				  ipa_parm_adjustment_vec adjustments)
+ {
+   struct ipa_parm_adjustment *adj;
+-  tree lhs, decl, repl, name;
++  tree decl, repl, new_name;
+ 
+-  if (gimple_code (stmt) == GIMPLE_PHI)
+-    lhs = gimple_phi_result (stmt);
+-  else if (is_gimple_assign (stmt))
+-    lhs = gimple_assign_lhs (stmt);
+-  else if (is_gimple_call (stmt))
+-    lhs = gimple_call_lhs (stmt);
+-  else
+-    gcc_unreachable ();
++  if (TREE_CODE (old_name) != SSA_NAME)
++    return NULL;
+ 
+-  if (TREE_CODE (lhs) != SSA_NAME)
+-    return false;
+-
+-  decl = SSA_NAME_VAR (lhs);
++  decl = SSA_NAME_VAR (old_name);
+   if (decl == NULL_TREE
+       || TREE_CODE (decl) != PARM_DECL)
+-    return false;
++    return NULL;
+ 
+   adj = get_adjustment_for_base (adjustments, decl);
+   if (!adj)
+-    return false;
++    return NULL;
+ 
+   repl = get_replaced_param_substitute (adj);
+-  name = make_ssa_name (repl, stmt);
++  new_name = make_ssa_name (repl, stmt);
+ 
+   if (dump_file)
+     {
+       fprintf (dump_file, "replacing an SSA name of a removed param ");
+-      print_generic_expr (dump_file, lhs, 0);
++      print_generic_expr (dump_file, old_name, 0);
+       fprintf (dump_file, " with ");
+-      print_generic_expr (dump_file, name, 0);
++      print_generic_expr (dump_file, new_name, 0);
+       fprintf (dump_file, "\n");
+     }
+ 
+-  if (is_gimple_assign (stmt))
+-    gimple_assign_set_lhs (stmt, name);
+-  else if (is_gimple_call (stmt))
+-    gimple_call_set_lhs (stmt, name);
+-  else
+-    gimple_phi_set_result (as_a <gphi *> (stmt), name);
+-
+-  replace_uses_by (lhs, name);
+-  release_ssa_name (lhs);
+-  return true;
++  replace_uses_by (old_name, new_name);
++  return new_name;
+ }
+ 
+ /* If the statement STMT contains any expressions that need to replaced with a
+@@ -4713,7 +4697,16 @@
+       gimple_stmt_iterator gsi;
+ 
+       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-	replace_removed_params_ssa_names (gsi_stmt (gsi), adjustments);
++	{
++	  gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
++	  tree new_lhs, old_lhs = gimple_phi_result (phi);
++	  new_lhs = replace_removed_params_ssa_names (old_lhs, phi, adjustments);
++	  if (new_lhs)
++	    {
++	      gimple_phi_set_result (phi, new_lhs);
++	      release_ssa_name (old_lhs);
++	    }
++	}
+ 
+       gsi = gsi_start_bb (bb);
+       while (!gsi_end_p (gsi))
+@@ -4733,7 +4726,6 @@
+ 
+ 	    case GIMPLE_ASSIGN:
+ 	      modified |= sra_ipa_modify_assign (stmt, &gsi, adjustments);
+-	      modified |= replace_removed_params_ssa_names (stmt, adjustments);
+ 	      break;
+ 
+ 	    case GIMPLE_CALL:
+@@ -4748,8 +4740,6 @@
+ 		{
+ 		  t = gimple_call_lhs_ptr (stmt);
+ 		  modified |= ipa_modify_expr (t, false, adjustments);
+-		  modified |= replace_removed_params_ssa_names (stmt,
+-								adjustments);
+ 		}
+ 	      break;
+ 
+@@ -4773,6 +4763,20 @@
+ 	      break;
+ 	    }
+ 
++	  def_operand_p defp;
++	  ssa_op_iter iter;
++	  FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF)
++	    {
++	      tree old_def = DEF_FROM_PTR (defp);
++	      if (tree new_def = replace_removed_params_ssa_names (old_def, stmt,
++								   adjustments))
++		{
++		  SET_DEF (defp, new_def);
++		  release_ssa_name (old_def);
++		  modified = true;
++		}
++	    }
++
+ 	  if (modified)
+ 	    {
+ 	      update_stmt (stmt);
 Index: gcc/ipa-prop.c
 ===================================================================
 --- a/src/gcc/ipa-prop.c	(.../tags/gcc_5_2_0_release)
@@ -547512,7 +548591,23 @@
 ===================================================================
 --- a/src/gcc/config/s390/s390.md	(.../tags/gcc_5_2_0_release)
 +++ b/src/gcc/config/s390/s390.md	(.../branches/gcc-5-branch)
-@@ -9594,7 +9594,7 @@
+@@ -6044,8 +6044,13 @@
+ 			  (match_operand:GPR 2 "nonimmediate_operand" "")
+ 			  (match_operand:GPR 3 "nonimmediate_operand" "")))]
+   "TARGET_Z196"
+-  "operands[1] = s390_emit_compare (GET_CODE (operands[1]),
+-                                    XEXP (operands[1], 0), XEXP (operands[1], 1));")
++{
++  /* Emit the comparison insn in case we do not already have a comparison result.  */
++  if (!s390_comparison (operands[1], VOIDmode))
++    operands[1] = s390_emit_compare (GET_CODE (operands[1]),
++				     XEXP (operands[1], 0),
++				     XEXP (operands[1], 1));
++})
+ 
+ ; locr, loc, stoc, locgr, locg, stocg
+ (define_insn_and_split "*mov<mode>cc"
+@@ -9594,7 +9599,7 @@
  {
    /* Unless this is a SEQ_CST fence, the s390 memory model is strong
       enough not to require barriers of any kind.  */
@@ -547521,7 +548616,7 @@
      {
        rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
        MEM_VOLATILE_P (mem) = 1;
-@@ -9675,7 +9675,7 @@
+@@ -9675,7 +9680,7 @@
     (match_operand:SI 2 "const_int_operand")]	;; model
    ""
  {
@@ -547530,7 +548625,7 @@
  
    if (MEM_ALIGN (operands[0]) < GET_MODE_BITSIZE (GET_MODE (operands[0])))
      FAIL;
-@@ -9686,7 +9686,7 @@
+@@ -9686,7 +9691,7 @@
      emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
    else
      emit_move_insn (operands[0], operands[1]);
@@ -548712,7 +549807,25 @@
        return chkp_function_instrumented_p (current_function_decl);
  
        /* Complex values are returned in %st(0)/%st(1) pair.  */
-@@ -10207,10 +10205,14 @@
+@@ -10104,11 +10102,14 @@
+   frame->nregs = ix86_nsaved_regs ();
+   frame->nsseregs = ix86_nsaved_sseregs ();
+ 
+-  /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
+-     function prologues and leaf.  */
++  /* 64-bit MS ABI seem to require stack alignment to be always 16,
++     except for function prologues, leaf functions and when the defult
++     incoming stack boundary is overriden at command line or via
++     force_align_arg_pointer attribute.  */
+   if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
+       && (!crtl->is_leaf || cfun->calls_alloca != 0
+-          || ix86_current_function_calls_tls_descriptor))
++	  || ix86_current_function_calls_tls_descriptor
++	  || ix86_incoming_stack_boundary < 128))
+     {
+       crtl->preferred_stack_boundary = 128;
+       crtl->stack_alignment_needed = 128;
+@@ -10207,10 +10208,14 @@
    if (frame->nsseregs)
      {
        /* The only ABI that has saved SSE registers (Win64) also has a
@@ -548731,7 +549844,7 @@
        offset += frame->nsseregs * 16;
      }
    frame->sse_reg_save_offset = offset;
-@@ -10220,7 +10222,7 @@
+@@ -10220,7 +10225,7 @@
       sure that no value happens to be the same before and after, force
       the alignment computation below to add a non-zero value.  */
    if (stack_realign_fp)
@@ -548740,7 +549853,7 @@
  
    /* Va-arg area */
    frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
-@@ -10433,15 +10435,24 @@
+@@ -10433,15 +10438,24 @@
  {
    struct machine_function *m = cfun->machine;
    rtx reg = gen_rtx_REG (mode, regno);
@@ -548768,7 +549881,7 @@
    RTX_FRAME_RELATED_P (insn) = 1;
  
    base = addr;
-@@ -10489,6 +10500,9 @@
+@@ -10489,6 +10503,9 @@
        mem = gen_rtx_MEM (mode, addr);
        add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
      }
@@ -548778,7 +549891,7 @@
  }
  
  /* Emit code to save registers using MOV insns.
-@@ -10705,6 +10719,25 @@
+@@ -10705,6 +10722,25 @@
      }
  }
  
@@ -548804,7 +549917,7 @@
  /* Return minimum incoming stack alignment.  */
  
  static unsigned int
-@@ -10719,7 +10752,6 @@
+@@ -10719,7 +10755,6 @@
       if -mstackrealign is used, it isn't used for sibcall check and
       estimated stack alignment is 128bit.  */
    else if (!sibcall
@@ -548812,7 +549925,7 @@
  	   && ix86_force_align_arg_pointer
  	   && crtl->stack_alignment_estimated == 128)
      incoming_stack_boundary = MIN_STACK_BOUNDARY;
-@@ -11578,7 +11610,7 @@
+@@ -11578,7 +11613,7 @@
           pointer is no longer valid.  As for the value of sp_offset,
  	 see ix86_compute_frame_layout, which we need to match in order
  	 to pass verification of stack_pointer_offset at the end.  */
@@ -548821,7 +549934,7 @@
        m->fs.sp_valid = false;
      }
  
-@@ -11991,12 +12023,27 @@
+@@ -11991,12 +12026,27 @@
        {
  	rtx reg = gen_rtx_REG (V4SFmode, regno);
  	rtx mem;
@@ -548851,7 +549964,7 @@
  	ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
  
  	cfa_offset -= 16;
-@@ -25140,7 +25187,8 @@
+@@ -25140,7 +25190,8 @@
        dst = change_address (dst, BLKmode, destreg);
        set_mem_align (dst, desired_align * BITS_PER_UNIT);
        epilogue_size_needed = 0;
@@ -548861,7 +549974,7 @@
  	{
  	  /* It is possible that we copied enough so the main loop will not
  	     execute.  */
-@@ -25272,7 +25320,7 @@
+@@ -25272,7 +25323,7 @@
  	  max_size -= align_bytes;
  	}
        if (need_zero_guard
@@ -548870,7 +549983,7 @@
  	  && (count < (unsigned HOST_WIDE_INT) size_needed
  	      || (align_bytes == 0
  		  && count < ((unsigned HOST_WIDE_INT) size_needed
-@@ -25557,7 +25605,7 @@
+@@ -25557,7 +25608,7 @@
  
    /* Avoid branch in fixing the byte.  */
    tmpreg = gen_lowpart (QImode, tmpreg);
@@ -548879,7 +549992,7 @@
    tmp = gen_rtx_REG (CCmode, FLAGS_REG);
    cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
    emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
-@@ -39522,53 +39570,41 @@
+@@ -39522,53 +39573,41 @@
        return target;
  
      case IX86_BUILTIN_SBB32:
@@ -548945,7 +550058,7 @@
        op4 = expand_normal (arg3);
        if (!address_operand (op4, VOIDmode))
  	{
-@@ -39575,8 +39611,17 @@
+@@ -39575,8 +39614,17 @@
  	  op4 = convert_memory_address (Pmode, op4);
  	  op4 = copy_addr_to_reg (op4);
  	}
@@ -548964,7 +550077,7 @@
        /* Return current CF value.  */
        if (target == 0)
          target = gen_reg_rtx (QImode);
-@@ -39583,6 +39628,10 @@
+@@ -39583,6 +39631,10 @@
  
        PUT_MODE (pat, QImode);
        emit_insn (gen_rtx_SET (VOIDmode, target, pat));
@@ -548975,7 +550088,7 @@
        return target;
  
      case IX86_BUILTIN_READ_FLAGS:
-@@ -46836,7 +46885,7 @@
+@@ -46836,7 +46888,7 @@
      true },
    /* force_align_arg_pointer says this function realigns the stack at entry.  */
    { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
@@ -548984,7 +550097,7 @@
  #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
    { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
    { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
-@@ -48957,6 +49006,62 @@
+@@ -48957,6 +49009,62 @@
    return true;
  }
  
@@ -549047,7 +550160,7 @@
  /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
     and extract-odd permutations.  */
  
-@@ -49059,6 +49164,9 @@
+@@ -49059,6 +49167,9 @@
      case V32QImode:
        return expand_vec_perm_even_odd_pack (d);
  
@@ -549057,7 +550170,7 @@
      case V4DImode:
        if (!TARGET_AVX2)
  	{
-@@ -49520,6 +49628,8 @@
+@@ -49520,6 +49631,8 @@
  
    /* Try sequences of four instructions.  */
  
@@ -549066,7 +550179,7 @@
    if (expand_vec_perm_vpshufb2_vpermq (d))
      return true;
  
-@@ -50335,6 +50445,14 @@
+@@ -50335,6 +50448,14 @@
    unsigned int size = INTVAL (operands[1]);
    unsigned int pos = INTVAL (operands[2]);
  
@@ -549081,7 +550194,7 @@
    if (GET_CODE (dst) == SUBREG)
      {
        pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
-@@ -50341,9 +50459,6 @@
+@@ -50341,9 +50462,6 @@
        dst = SUBREG_REG (dst);
      }
  
@@ -549091,7 +550204,7 @@
    switch (GET_MODE (dst))
      {
      case V16QImode:
-@@ -50391,6 +50506,10 @@
+@@ -50391,6 +50509,10 @@
  	    return false;
  	  }
  
@@ -549102,7 +550215,7 @@
  	rtx d = dst;
  	if (GET_MODE (dst) != dstmode)
  	  d = gen_reg_rtx (dstmode);
-@@ -51516,7 +51635,7 @@
+@@ -51516,7 +51638,7 @@
  static unsigned HOST_WIDE_INT
  ix86_memmodel_check (unsigned HOST_WIDE_INT val)
  {
@@ -549111,7 +550224,7 @@
    bool strong;
  
    if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
-@@ -51527,14 +51646,14 @@
+@@ -51527,14 +51649,14 @@
  	       "Unknown architecture specific memory model");
        return MEMMODEL_SEQ_CST;
      }
@@ -549129,7 +550242,7 @@
      {
        warning (OPT_Winvalid_memory_model,
                "HLE_RELEASE not used with RELEASE or stronger memory model");
-@@ -52307,9 +52426,6 @@
+@@ -52307,9 +52429,6 @@
  #undef TARGET_CAN_INLINE_P
  #define TARGET_CAN_INLINE_P ix86_can_inline_p
  
@@ -551047,7 +552160,57 @@
  	  *cost = COSTS_N_INSNS (1);
  	  if (speed_p)
  	    *cost += extra_cost->fp[mode != SFmode].neg;
-@@ -27678,8 +27685,8 @@
+@@ -27537,25 +27544,36 @@
+   return 0;
+ }
+ 
++/* If X is a CONST_DOUBLE with a value that is a power of 2 whose
++   log2 is in [1, 32], return that log2.  Otherwise return -1.
++   This is used in the patterns for vcvt.s32.f32 floating-point to
++   fixed-point conversions.  */
++
+ int
+-vfp3_const_double_for_bits (rtx operand)
++vfp3_const_double_for_bits (rtx x)
+ {
+-  REAL_VALUE_TYPE r0;
++  if (!CONST_DOUBLE_P (x))
++    return -1;
+ 
+-  if (!CONST_DOUBLE_P (operand))
+-    return 0;
++  REAL_VALUE_TYPE r;
+ 
+-  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+-  if (exact_real_truncate (DFmode, &r0))
+-    {
+-      HOST_WIDE_INT value = real_to_integer (&r0);
+-      value = value & 0xffffffff;
+-      if ((value != 0) && ( (value & (value - 1)) == 0))
+-	return int_log2 (value);
+-    }
++  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
++  if (REAL_VALUE_NEGATIVE (r)
++      || REAL_VALUE_ISNAN (r)
++      || REAL_VALUE_ISINF (r)
++      || !real_isinteger (&r, SFmode))
++    return -1;
+ 
+-  return 0;
++  HOST_WIDE_INT hwint = exact_log2 (real_to_integer (&r));
++
++  /* The exact_log2 above will have returned -1 if this is
++     not an exact log2.  */
++  if (!IN_RANGE (hwint, 1, 32))
++    return -1;
++
++  return hwint;
+ }
++
+ 
+ /* Emit a memory barrier around an atomic sequence according to MODEL.  */
+ 
+@@ -27678,8 +27696,8 @@
       promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
  
    if (TARGET_HAVE_LDACQ
@@ -551058,7 +552221,7 @@
      mod_s = GEN_INT (MEMMODEL_ACQ_REL);
  
    switch (mode)
-@@ -27752,21 +27759,26 @@
+@@ -27752,21 +27770,26 @@
    oldval = operands[2];
    newval = operands[3];
    is_weak = (operands[4] != const0_rtx);
@@ -551094,7 +552257,7 @@
    /* Checks whether a barrier is needed and emits one accordingly.  */
    if (!(use_acquire || use_release))
      arm_pre_atomic_barrier (mod_s);
-@@ -27803,14 +27815,15 @@
+@@ -27803,14 +27826,15 @@
        emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
      }
  
@@ -551113,7 +552276,7 @@
      emit_label (label2);
  }
  
-@@ -27818,22 +27831,27 @@
+@@ -27818,22 +27842,27 @@
  arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
  		     rtx value, rtx model_rtx, rtx cond)
  {
@@ -551148,7 +552311,7 @@
    /* Checks whether a barrier is needed and emits one accordingly.  */
    if (!(use_acquire || use_release))
      arm_pre_atomic_barrier (model);
-@@ -27904,7 +27922,8 @@
+@@ -27904,7 +27933,8 @@
    emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
  
    /* Checks whether a barrier is needed and emits one accordingly.  */
@@ -551158,7 +552321,7 @@
      arm_post_atomic_barrier (model);
  }
  
-@@ -28792,7 +28811,39 @@
+@@ -28792,7 +28822,39 @@
    #undef BRANCH
  }
  
@@ -551265,6 +552428,29 @@
  	   (and (match_code "const_int")
                  (match_test "(ival & 0xffff0000) == 0")))))
  
+@@ -338,7 +339,8 @@
+  "@internal
+   In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation"
+   (and (match_code "const_double")
+-       (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_bits (op)")))
++       (match_test "TARGET_32BIT && TARGET_VFP
++		    && vfp3_const_double_for_bits (op) > 0")))
+ 
+ (define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS"
+  "For arm_restrict_it the core registers @code{r0}-@code{r7}.  GENERAL_REGS otherwise.")
+Index: gcc/config/arm/predicates.md
+===================================================================
+--- a/src/gcc/config/arm/predicates.md	(.../tags/gcc_5_2_0_release)
++++ b/src/gcc/config/arm/predicates.md	(.../branches/gcc-5-branch)
+@@ -668,7 +668,7 @@
+ (define_predicate "const_double_vcvt_power_of_two"
+   (and (match_code "const_double")
+        (match_test "TARGET_32BIT && TARGET_VFP
+-                   && vfp3_const_double_for_bits (op)")))
++		    && vfp3_const_double_for_bits (op) > 0")))
+ 
+ (define_predicate "neon_struct_operand"
+   (and (match_code "mem")
 Index: gcc/config/arm/sync.md
 ===================================================================
 --- a/src/gcc/config/arm/sync.md	(.../tags/gcc_5_2_0_release)
@@ -553691,6 +554877,45 @@
  #endif /* __MINGW32__ */
  
  
+Index: libffi/ChangeLog
+===================================================================
+--- a/src/libffi/ChangeLog	(.../tags/gcc_5_2_0_release)
++++ b/src/libffi/ChangeLog	(.../branches/gcc-5-branch)
+@@ -1,3 +1,9 @@
++2015-10-26  John David Anglin  <danglin@gcc.gnu.org>
++
++	PR libffi/65441
++	* testsuite/lib/libffi.exp: Load target-supports-dg.exp.
++	* testsuite/libffi.call/float2.c: Don't run on hppa*-*-hpux*.
++
+ 2015-07-16  Release Manager
+ 
+ 	* GCC 5.2.0 released.
+Index: libffi/testsuite/libffi.call/float2.c
+===================================================================
+--- a/src/libffi/testsuite/libffi.call/float2.c	(.../tags/gcc_5_2_0_release)
++++ b/src/libffi/testsuite/libffi.call/float2.c	(.../branches/gcc-5-branch)
+@@ -3,7 +3,7 @@
+    Limitations:	none.
+    PR:		none.
+    Originator:	From the original ffitest.c  */
+-/* { dg-do run } */
++/* { dg-do run { target { ! hppa*-*-hpux* } } } */
+ 
+ #include "ffitest.h"
+ #include "float.h"
+Index: libffi/testsuite/lib/libffi.exp
+===================================================================
+--- a/src/libffi/testsuite/lib/libffi.exp	(.../tags/gcc_5_2_0_release)
++++ b/src/libffi/testsuite/lib/libffi.exp	(.../branches/gcc-5-branch)
+@@ -24,6 +24,7 @@
+ load_lib dg.exp
+ load_lib libgloss.exp
+ load_gcc_lib target-supports.exp
++load_gcc_lib target-supports-dg.exp
+ load_gcc_lib target-libpath.exp
+ load_gcc_lib wrapper.exp
+ 
 Index: libcpp/po/nl.po
 ===================================================================
 --- a/src/libcpp/po/nl.po	(.../tags/gcc_5_2_0_release)
diff -u gcc-5-5.2.1/debian/rules.conf gcc-5-5.2.1/debian/rules.conf
--- gcc-5-5.2.1/debian/rules.conf
+++ gcc-5-5.2.1/debian/rules.conf
@@ -207,12 +207,12 @@
   BINUTILSBDV = 2.22
   ifneq (,$(filter $(distrelease),vivid))
     BINUTILSBDV = 2.25-3~
-  endif
-  ifneq (,$(filter $(distrelease),jessie sid))
+  else ifneq (,$(filter $(distrelease),jessie sid))
     BINUTILSBDV = 2.25-7~
+  else ifneq (,$(filter $(distrelease),xenial))
+    BINUTILSBDV = 2.25.51.20151028
   endif
 endif
-BINUTILSBDV = 2.25.51.20151020-1~
 ifeq ($(DEB_CROSS),yes)
   BINUTILS_BUILD_DEP = binutils$(TS) (>= $(BINUTILSBDV)), binutils-multiarch (>= $(BINUTILSBDV))
   BINUTILSV := $(shell dpkg -l binutils$(TS) \
@@ -483,7 +483,7 @@
 # try to build with itself, or with the last version
 ifneq (,$(filter $(distrelease), jessie))
   gnat_build_dep := gnat-4.9 [$(ada_no_archs)], g++-4.9
-else ifneq (,$(filter $(distrelease), stretch sid wheezy precise trusty wily))
+else ifneq (,$(filter $(distrelease), stretch sid wheezy precise trusty wily xenial))
   gnat_build_dep := gnat-5 [$(ada_no_archs)], g++-5
 else ifneq (,$(filter $(distrelease), squeeze lucid))
   gnat_build_dep :=
diff -u gcc-5-5.2.1/debian/rules.d/binary-ada.mk gcc-5-5.2.1/debian/rules.d/binary-ada.mk
--- gcc-5-5.2.1/debian/rules.d/binary-ada.mk
+++ gcc-5-5.2.1/debian/rules.d/binary-ada.mk
@@ -393,8 +393,9 @@
 	mv $(d_gnat)/usr/share/ada/debian_packaging.mk \
 	    $(d_gnat)/usr/share/ada/debian_packaging-$(GNAT_VERSION).mk
 endif
-	dh_link -p$(p_gnat) usr/bin/$(cmd_prefix)gcc$(pkg_ver) usr/bin/$(cmd_prefix)gnatgcc$(pkg_ver)
-	dh_link -p$(p_gnat) usr/share/man/man1/$(cmd_prefix)gnat$(pkg_ver).1.gz usr/share/man/man1/$(cmd_prefix)gnatgcc$(pkg_ver).1.gz
+	: # keep this one unversioned, see Debian #802838.
+	dh_link -p$(p_gnat) usr/bin/$(cmd_prefix)gcc$(pkg_ver) usr/bin/$(cmd_prefix)gnatgcc
+	dh_link -p$(p_gnat) usr/share/man/man1/$(cmd_prefix)gcc$(pkg_ver).1.gz usr/share/man/man1/$(cmd_prefix)gnatgcc.1.gz
 
 	debian/dh_rmemptydirs -p$(p_gnat)
 
diff -u gcc-5-5.2.1/debian/rules.defs gcc-5-5.2.1/debian/rules.defs
--- gcc-5-5.2.1/debian/rules.defs
+++ gcc-5-5.2.1/debian/rules.defs
@@ -352,9 +352,9 @@
 # build using fsf or linaro
 ifeq ($(distribution),Ubuntu)
   ifeq (,$(findstring gnat, $(PKGSOURCE)))
-  #ifneq (,$(findstring $(DEB_TARGET_ARCH),arm64 armel armhf))
-  #  with_linaro_branch = yes
-  #endif
+  ifneq (,$(findstring $(DEB_TARGET_ARCH),arm64 armel armhf))
+    with_linaro_branch = yes
+  endif
   endif
 endif
 
@@ -760,7 +760,7 @@
   ifeq (,$(java_cpu))
     java_cpu = $(DEB_TARGET_ARCH_CPU)
   endif
-  java_priority = 10$(subst .,,$(BASE_VERSION))
+  java_priority = 10$(subst .,,$(BASE_VERSION))0
 
   with_libgcj := yes
   with_libgcjbc := no
diff -u gcc-5-5.2.1/debian/rules.parameters gcc-5-5.2.1/debian/rules.parameters
--- gcc-5-5.2.1/debian/rules.parameters
+++ gcc-5-5.2.1/debian/rules.parameters
@@ -2,14 +2,14 @@
 GCC_VERSION	:= 5.2.1
 NEXT_GCC_VERSION	:= 5.2.2
 BASE_VERSION	:= 5
-SOURCE_VERSION	:= 5.2.1-22ubuntu3
-DEB_VERSION	:= 5.2.1-22ubuntu3
-DEB_EVERSION	:= 1:5.2.1-22ubuntu3
-DEB_GDC_VERSION	:= 5.2.1-22ubuntu3
+SOURCE_VERSION	:= 5.2.1-23ubuntu1~15.10
+DEB_VERSION	:= 5.2.1-23ubuntu1~15.10
+DEB_EVERSION	:= 1:5.2.1-23ubuntu1~15.10
+DEB_GDC_VERSION	:= 5.2.1-23ubuntu1~15.10
 DEB_SOVERSION	:= 5
 DEB_SOEVERSION	:= 1:5
 DEB_LIBGCC_SOVERSION	:= 
-DEB_LIBGCC_VERSION	:= 1:5.2.1-22ubuntu3
+DEB_LIBGCC_VERSION	:= 1:5.2.1-23ubuntu1~15.10
 DEB_STDCXX_SOVERSION	:= 5
 DEB_GCJ_SOVERSION	:= 5
 PKG_GCJ_EXT	:= 16
diff -u gcc-5-5.2.1/debian/rules.patch gcc-5-5.2.1/debian/rules.patch
--- gcc-5-5.2.1/debian/rules.patch
+++ gcc-5-5.2.1/debian/rules.patch
@@ -87,9 +87,10 @@
 	go-escape-analysis6 \
 	gccgo-sendfile-fix \
 	pr66368 \
-	pr67280 \
+	$(if $(with_linaro_branch),,pr67280) \
 	pr67508 \
 	pr67590 \
+	pr67736 \
 
 # this is still needed on powerpc, e.g. firefox and insighttoolkit4 will ftbfs.
 ifneq (,$(filter $(DEB_TARGET_ARCH),powerpc))
@@ -248,7 +249,7 @@
   debian_patches =
 endif
 
-debian_patches += gcc-sysroot
+debian_patches += $(if $(with_linaro_branch),,gcc-sysroot)
 debian_patches += \
 	sys-auxv-header \
 	libcilkrts-targets \
@@ -317,9 +318,6 @@
 ifeq ($(DEB_TARGET_ARCH),mipsel)
   debian_patches += mips-fix-loongson2f-nop
 endif
-ifneq (,$(filter $(DEB_TARGET_ARCH), mips64 mips64el))
-  debian_patches += pr67736
-endif
 
 debian_patches += libgomp-kfreebsd-testsuite
 debian_patches += go-testsuite
diff -u gcc-5-5.2.1/debian/rules2 gcc-5-5.2.1/debian/rules2
--- gcc-5-5.2.1/debian/rules2
+++ gcc-5-5.2.1/debian/rules2
@@ -399,12 +399,12 @@
 
 ifneq (,$(findstring powerpc64le-linux,$(DEB_TARGET_GNU_TYPE)))
     CONFARGS += --enable-secureplt
-    ifneq (,$(filter $(distrelease),jessie trusty utopic vivid))
+    ifneq (,$(filter $(distrelease),jessie trusty utopic vivid wily))
       CONFARGS += --with-cpu=power7 --with-tune=power8
     else
       CONFARGS += --with-cpu=power8
     endif
-    ifneq (,$(filter $(distrelease),jessie stretch sid trusty utopic vivid wily))
+    ifneq (,$(filter $(distrelease),jessie stretch sid trusty utopic vivid wily xenial))
       CONFARGS += --enable-targets=powerpcle-linux
     endif
     CONFARGS += --disable-multilib